https://github.com/ranapratap55 updated https://github.com/llvm/llvm-project/pull/140210
>From d1571dd53f157c9d4180c51e709d9bed0ba00136 Mon Sep 17 00:00:00 2001 From: ranapratap55 <ranapratapreddy.nimmakay...@amd.com> Date: Fri, 16 May 2025 12:50:09 +0530 Subject: [PATCH 1/2] [WIP][AMDGPU] Support for type inferring image load/store builtins for AMDGPU --- clang/include/clang/Basic/BuiltinsAMDGPU.def | 5 +++ clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp | 24 ++++++++++++++ .../test/CodeGen/builtins-image-load-2d-f32.c | 31 +++++++++++++++++++ 3 files changed, 60 insertions(+) create mode 100644 clang/test/CodeGen/builtins-image-load-2d-f32.c diff --git a/clang/include/clang/Basic/BuiltinsAMDGPU.def b/clang/include/clang/Basic/BuiltinsAMDGPU.def index 39fef9e4601f8..67045809fa726 100644 --- a/clang/include/clang/Basic/BuiltinsAMDGPU.def +++ b/clang/include/clang/Basic/BuiltinsAMDGPU.def @@ -635,5 +635,10 @@ TARGET_BUILTIN(__builtin_amdgcn_bitop3_b16, "ssssIUi", "nc", "bitop3-insts") TARGET_BUILTIN(__builtin_amdgcn_cvt_sr_bf16_f32, "V2yV2yfUiIb", "nc", "f32-to-f16bf16-cvt-sr-insts") TARGET_BUILTIN(__builtin_amdgcn_cvt_sr_f16_f32, "V2hV2hfUiIb", "nc", "f32-to-f16bf16-cvt-sr-insts") +//===----------------------------------------------------------------------===// +// Image builtins +//===----------------------------------------------------------------------===// +BUILTIN(__builtin_amdgcn_image_load_2d_f32_i32, "fiiV8i", "n") + #undef BUILTIN #undef TARGET_BUILTIN diff --git a/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp b/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp index ad012d98635ff..15f5cd89beaa9 100644 --- a/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp +++ b/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp @@ -683,6 +683,30 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, return Builder.CreateInsertElement(I0, A, 1); } + case AMDGPU::BI__builtin_amdgcn_image_load_2d_f32_i32: { + llvm::Type *RetTy = llvm::Type::getFloatTy(Builder.getContext()); + llvm::Type *IntTy = llvm::IntegerType::get(Builder.getContext(), 32u); + + llvm::Value *imm0 = llvm::ConstantInt::get(IntTy, 1); + llvm::Value *arg0 = EmitScalarExpr(E->getArg(0)); + llvm::Value *arg1 = EmitScalarExpr(E->getArg(1)); + llvm::Value *arg2 = EmitScalarExpr(E->getArg(2)); + llvm::Value *imm1 = llvm::ConstantInt::get(IntTy, 0); + llvm::Value *imm2 = llvm::ConstantInt::get(IntTy, 0); + + SmallVector<Value *, 6> ArgTys; + ArgTys.push_back(imm0); + ArgTys.push_back(arg0); + ArgTys.push_back(arg1); + ArgTys.push_back(arg2); + ArgTys.push_back(imm1); + ArgTys.push_back(imm2); + + llvm::CallInst *Call = + Builder.CreateIntrinsic(RetTy, Intrinsic::amdgcn_image_load_2d, ArgTys); + + return Call; + } case AMDGPU::BI__builtin_amdgcn_mfma_scale_f32_16x16x128_f8f6f4: case AMDGPU::BI__builtin_amdgcn_mfma_scale_f32_32x32x64_f8f6f4: { llvm::FixedVectorType *VT = FixedVectorType::get(Builder.getInt32Ty(), 8); diff --git a/clang/test/CodeGen/builtins-image-load-2d-f32.c b/clang/test/CodeGen/builtins-image-load-2d-f32.c new file mode 100644 index 0000000000000..78dab461c1f38 --- /dev/null +++ b/clang/test/CodeGen/builtins-image-load-2d-f32.c @@ -0,0 +1,31 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5 +// RUN: %clang_cc1 -triple amdgcn-unknown-unknown %s -emit-llvm -o - | FileCheck %s + +#pragma OPENCL EXTENSION cl_khr_fp64:enable + +typedef int v8i __attribute__((ext_vector_type(8))); + +// CHECK-LABEL: define dso_local float @test_builtin_image_load_2d( +// CHECK-SAME: float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VECI32:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca float, align 4, addrspace(5) +// CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5) +// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[VECI32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5) +// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr +// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr +// CHECK-NEXT: [[VECI32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VECI32_ADDR]] to ptr +// CHECK-NEXT: store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store <8 x i32> [[VECI32]], ptr [[VECI32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = load <8 x i32>, ptr [[VECI32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TMP3:%.*]] = call float @llvm.amdgcn.image.load.2d.f32.i32.v8i32(i32 1, i32 [[TMP0]], i32 [[TMP1]], <8 x i32> [[TMP2]], i32 0, i32 0) +// CHECK-NEXT: ret float [[TMP3]] +// +float test_builtin_image_load_2d(float f32, int i32, v8i veci32) { + + return __builtin_amdgcn_image_load_2d_f32_i32(i32, i32, veci32); +} >From 7168d19ca478e6b3991011deae04584c6c11439a Mon Sep 17 00:00:00 2001 From: ranapratap55 <ranapratapreddy.nimmakay...@amd.com> Date: Wed, 18 Jun 2025 10:55:22 +0530 Subject: [PATCH 2/2] [AMDGPU] Adds builtins for image load and sema checking for image load --- clang/include/clang/Basic/BuiltinsAMDGPU.def | 33 +- clang/include/clang/Sema/SemaAMDGPU.h | 2 + clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp | 216 +++++- clang/lib/Sema/SemaAMDGPU.cpp | 42 + .../test/CodeGen/builtins-image-load-2d-f32.c | 715 +++++++++++++++++- .../builtins-image-load-2d-f32-param.cl | 132 ++++ 6 files changed, 1105 insertions(+), 35 deletions(-) create mode 100644 clang/test/SemaOpenCL/builtins-image-load-2d-f32-param.cl diff --git a/clang/include/clang/Basic/BuiltinsAMDGPU.def b/clang/include/clang/Basic/BuiltinsAMDGPU.def index 67045809fa726..1ae461676b034 100644 --- a/clang/include/clang/Basic/BuiltinsAMDGPU.def +++ b/clang/include/clang/Basic/BuiltinsAMDGPU.def @@ -636,9 +636,36 @@ TARGET_BUILTIN(__builtin_amdgcn_cvt_sr_bf16_f32, "V2yV2yfUiIb", "nc", "f32-to-f1 TARGET_BUILTIN(__builtin_amdgcn_cvt_sr_f16_f32, "V2hV2hfUiIb", "nc", "f32-to-f16bf16-cvt-sr-insts") //===----------------------------------------------------------------------===// -// Image builtins -//===----------------------------------------------------------------------===// -BUILTIN(__builtin_amdgcn_image_load_2d_f32_i32, "fiiV8i", "n") +// Image load builtins +//===----------------------------------------------------------------------===// +TARGET_BUILTIN(__builtin_amdgcn_image_load_1d_v4f32_i32, "V4fiiV8iii", "nc", "") +TARGET_BUILTIN(__builtin_amdgcn_image_load_1d_v4f16_i32, "V4hiiV8iii", "nc", "") +TARGET_BUILTIN(__builtin_amdgcn_image_load_1darray_v4f32_i32, "V4fiiiV8iii", "nc", "") +TARGET_BUILTIN(__builtin_amdgcn_image_load_1darray_v4f16_i32, "V4hiiiV8iii", "nc", "") +TARGET_BUILTIN(__builtin_amdgcn_image_load_2d_f32_i32, "fiiiV8iii", "nc", "") +TARGET_BUILTIN(__builtin_amdgcn_image_load_2d_v4f32_i32, "V4fiiiV8iii", "nc", "") +TARGET_BUILTIN(__builtin_amdgcn_image_load_2d_v4f16_i32, "V4hiiiV8iii", "nc", "") +TARGET_BUILTIN(__builtin_amdgcn_image_load_2darray_f32_i32, "fiiiiV8iii", "nc", "") +TARGET_BUILTIN(__builtin_amdgcn_image_load_2darray_v4f32_i32, "V4fiiiiV8iii", "nc", "") +TARGET_BUILTIN(__builtin_amdgcn_image_load_2darray_v4f16_i32, "V4hiiiiV8iii", "nc", "") +TARGET_BUILTIN(__builtin_amdgcn_image_load_3d_v4f32_i32, "V4fiiiiV8iii", "nc", "") +TARGET_BUILTIN(__builtin_amdgcn_image_load_3d_v4f16_i32, "V4hiiiiV8iii", "nc", "") +TARGET_BUILTIN(__builtin_amdgcn_image_load_cube_v4f32_i32, "V4fiiiiV8iii", "nc", "") +TARGET_BUILTIN(__builtin_amdgcn_image_load_cube_v4f16_i32, "V4hiiiiV8iii", "nc", "") +TARGET_BUILTIN(__builtin_amdgcn_image_load_mip_1d_v4f32_i32, "V4fiiiV8iii", "nc", "") +TARGET_BUILTIN(__builtin_amdgcn_image_load_mip_1d_v4f16_i32, "V4hiiiV8iii", "nc", "") +TARGET_BUILTIN(__builtin_amdgcn_image_load_mip_1darray_v4f32_i32, "V4fiiiiV8iii", "nc", "") +TARGET_BUILTIN(__builtin_amdgcn_image_load_mip_1darray_v4f16_i32, "V4hiiiiV8iii", "nc", "") +TARGET_BUILTIN(__builtin_amdgcn_image_load_mip_2d_f32_i32, "fiiiiV8iii", "nc", "") +TARGET_BUILTIN(__builtin_amdgcn_image_load_mip_2d_v4f32_i32, "V4fiiiiV8iii", "nc", "") +TARGET_BUILTIN(__builtin_amdgcn_image_load_mip_2d_v4f16_i32, "V4hiiiiV8iii", "nc", "") +TARGET_BUILTIN(__builtin_amdgcn_image_load_mip_2darray_f32_i32, "fiiiiiV8iii", "nc", "") +TARGET_BUILTIN(__builtin_amdgcn_image_load_mip_2darray_v4f32_i32, "V4fiiiiiV8iii", "nc", "") +TARGET_BUILTIN(__builtin_amdgcn_image_load_mip_2darray_v4f16_i32, "V4hiiiiiV8iii", "nc", "") +TARGET_BUILTIN(__builtin_amdgcn_image_load_mip_3d_v4f32_i32, "V4fiiiiiV8iii", "nc", "") +TARGET_BUILTIN(__builtin_amdgcn_image_load_mip_3d_v4f16_i32, "V4hiiiiiV8iii", "nc", "") +TARGET_BUILTIN(__builtin_amdgcn_image_load_mip_cube_v4f32_i32, "V4fiiiiiV8iii", "nc", "") +TARGET_BUILTIN(__builtin_amdgcn_image_load_mip_cube_v4f16_i32, "V4hiiiiiV8iii", "nc", "") #undef BUILTIN #undef TARGET_BUILTIN diff --git a/clang/include/clang/Sema/SemaAMDGPU.h b/clang/include/clang/Sema/SemaAMDGPU.h index d62c9bb65fadb..673f2719fe227 100644 --- a/clang/include/clang/Sema/SemaAMDGPU.h +++ b/clang/include/clang/Sema/SemaAMDGPU.h @@ -29,6 +29,8 @@ class SemaAMDGPU : public SemaBase { bool checkMovDPPFunctionCall(CallExpr *TheCall, unsigned NumArgs, unsigned NumDataArgs); + bool checkImageImmArgFunctionCall(CallExpr *TheCall, unsigned ArgCount); + /// Create an AMDGPUWavesPerEUAttr attribute. AMDGPUFlatWorkGroupSizeAttr * CreateAMDGPUFlatWorkGroupSizeAttr(const AttributeCommonInfo &CI, Expr *Min, diff --git a/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp b/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp index 15f5cd89beaa9..ef1cebf39d7d8 100644 --- a/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp +++ b/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp @@ -683,27 +683,203 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, return Builder.CreateInsertElement(I0, A, 1); } - case AMDGPU::BI__builtin_amdgcn_image_load_2d_f32_i32: { - llvm::Type *RetTy = llvm::Type::getFloatTy(Builder.getContext()); - llvm::Type *IntTy = llvm::IntegerType::get(Builder.getContext(), 32u); - - llvm::Value *imm0 = llvm::ConstantInt::get(IntTy, 1); - llvm::Value *arg0 = EmitScalarExpr(E->getArg(0)); - llvm::Value *arg1 = EmitScalarExpr(E->getArg(1)); - llvm::Value *arg2 = EmitScalarExpr(E->getArg(2)); - llvm::Value *imm1 = llvm::ConstantInt::get(IntTy, 0); - llvm::Value *imm2 = llvm::ConstantInt::get(IntTy, 0); - - SmallVector<Value *, 6> ArgTys; - ArgTys.push_back(imm0); - ArgTys.push_back(arg0); - ArgTys.push_back(arg1); - ArgTys.push_back(arg2); - ArgTys.push_back(imm1); - ArgTys.push_back(imm2); + case AMDGPU::BI__builtin_amdgcn_image_load_1d_v4f32_i32: + case AMDGPU::BI__builtin_amdgcn_image_load_1darray_v4f32_i32: + case AMDGPU::BI__builtin_amdgcn_image_load_1d_v4f16_i32: + case AMDGPU::BI__builtin_amdgcn_image_load_1darray_v4f16_i32: + case AMDGPU::BI__builtin_amdgcn_image_load_2d_f32_i32: + case AMDGPU::BI__builtin_amdgcn_image_load_2d_v4f32_i32: + case AMDGPU::BI__builtin_amdgcn_image_load_2d_v4f16_i32: + case AMDGPU::BI__builtin_amdgcn_image_load_2darray_f32_i32: + case AMDGPU::BI__builtin_amdgcn_image_load_2darray_v4f32_i32: + case AMDGPU::BI__builtin_amdgcn_image_load_2darray_v4f16_i32: + case AMDGPU::BI__builtin_amdgcn_image_load_3d_v4f32_i32: + case AMDGPU::BI__builtin_amdgcn_image_load_3d_v4f16_i32: + case AMDGPU::BI__builtin_amdgcn_image_load_cube_v4f32_i32: + case AMDGPU::BI__builtin_amdgcn_image_load_cube_v4f16_i32: + case AMDGPU::BI__builtin_amdgcn_image_load_mip_1d_v4f32_i32: + case AMDGPU::BI__builtin_amdgcn_image_load_mip_1d_v4f16_i32: + case AMDGPU::BI__builtin_amdgcn_image_load_mip_1darray_v4f32_i32: + case AMDGPU::BI__builtin_amdgcn_image_load_mip_1darray_v4f16_i32: + case AMDGPU::BI__builtin_amdgcn_image_load_mip_2d_f32_i32: + case AMDGPU::BI__builtin_amdgcn_image_load_mip_2d_v4f32_i32: + case AMDGPU::BI__builtin_amdgcn_image_load_mip_2d_v4f16_i32: + case AMDGPU::BI__builtin_amdgcn_image_load_mip_2darray_f32_i32: + case AMDGPU::BI__builtin_amdgcn_image_load_mip_2darray_v4f32_i32: + case AMDGPU::BI__builtin_amdgcn_image_load_mip_2darray_v4f16_i32: + case AMDGPU::BI__builtin_amdgcn_image_load_mip_3d_v4f32_i32: + case AMDGPU::BI__builtin_amdgcn_image_load_mip_3d_v4f16_i32: + case AMDGPU::BI__builtin_amdgcn_image_load_mip_cube_v4f32_i32: + case AMDGPU::BI__builtin_amdgcn_image_load_mip_cube_v4f16_i32: { + llvm::Type *RetTy = nullptr; + switch (BuiltinID) { + case AMDGPU::BI__builtin_amdgcn_image_load_2d_f32_i32: + case AMDGPU::BI__builtin_amdgcn_image_load_2darray_f32_i32: + case AMDGPU::BI__builtin_amdgcn_image_load_mip_2d_f32_i32: + case AMDGPU::BI__builtin_amdgcn_image_load_mip_2darray_f32_i32: + RetTy = llvm::Type::getFloatTy(Builder.getContext()); + break; + case AMDGPU::BI__builtin_amdgcn_image_load_1d_v4f32_i32: + case AMDGPU::BI__builtin_amdgcn_image_load_1darray_v4f32_i32: + case AMDGPU::BI__builtin_amdgcn_image_load_2d_v4f32_i32: + case AMDGPU::BI__builtin_amdgcn_image_load_2darray_v4f32_i32: + case AMDGPU::BI__builtin_amdgcn_image_load_3d_v4f32_i32: + case AMDGPU::BI__builtin_amdgcn_image_load_cube_v4f32_i32: + case AMDGPU::BI__builtin_amdgcn_image_load_mip_1d_v4f32_i32: + case AMDGPU::BI__builtin_amdgcn_image_load_mip_1darray_v4f32_i32: + case AMDGPU::BI__builtin_amdgcn_image_load_mip_2d_v4f32_i32: + case AMDGPU::BI__builtin_amdgcn_image_load_mip_2darray_v4f32_i32: + case AMDGPU::BI__builtin_amdgcn_image_load_mip_3d_v4f32_i32: + case AMDGPU::BI__builtin_amdgcn_image_load_mip_cube_v4f32_i32: + RetTy = + FixedVectorType::get(llvm::Type::getFloatTy(Builder.getContext()), 4); + break; + case AMDGPU::BI__builtin_amdgcn_image_load_1d_v4f16_i32: + case AMDGPU::BI__builtin_amdgcn_image_load_1darray_v4f16_i32: + case AMDGPU::BI__builtin_amdgcn_image_load_2d_v4f16_i32: + case AMDGPU::BI__builtin_amdgcn_image_load_2darray_v4f16_i32: + case AMDGPU::BI__builtin_amdgcn_image_load_3d_v4f16_i32: + case AMDGPU::BI__builtin_amdgcn_image_load_cube_v4f16_i32: + case AMDGPU::BI__builtin_amdgcn_image_load_mip_1d_v4f16_i32: + case AMDGPU::BI__builtin_amdgcn_image_load_mip_1darray_v4f16_i32: + case AMDGPU::BI__builtin_amdgcn_image_load_mip_2d_v4f16_i32: + case AMDGPU::BI__builtin_amdgcn_image_load_mip_2darray_v4f16_i32: + case AMDGPU::BI__builtin_amdgcn_image_load_mip_3d_v4f16_i32: + case AMDGPU::BI__builtin_amdgcn_image_load_mip_cube_v4f16_i32: + RetTy = + FixedVectorType::get(llvm::Type::getHalfTy(Builder.getContext()), 4); + break; + } - llvm::CallInst *Call = - Builder.CreateIntrinsic(RetTy, Intrinsic::amdgcn_image_load_2d, ArgTys); + llvm::Value *Dmask = EmitScalarExpr(E->getArg(0)); + llvm::Value *S = EmitScalarExpr(E->getArg(1)); + llvm::Value *T = EmitScalarExpr(E->getArg(2)); + llvm::Value *Slice; + llvm::Value *Mip; + llvm::Value *Rsrc; + llvm::Value *Tfe; + llvm::Value *Cpol; + + SmallVector<Value *, 10> ArgTys; + + Intrinsic::ID IID; + llvm::CallInst *Call; + + switch (BuiltinID) { + case AMDGPU::BI__builtin_amdgcn_image_load_1d_v4f32_i32: + case AMDGPU::BI__builtin_amdgcn_image_load_1d_v4f16_i32: { + Rsrc = EmitScalarExpr(E->getArg(2)); + Tfe = EmitScalarExpr(E->getArg(3)); + Cpol = EmitScalarExpr(E->getArg(4)); + + ArgTys = {Dmask, S, Rsrc, Tfe, Cpol}; + IID = Intrinsic::amdgcn_image_load_1d; + Call = Builder.CreateIntrinsic(RetTy, IID, ArgTys); + break; + } + case AMDGPU::BI__builtin_amdgcn_image_load_1darray_v4f32_i32: + case AMDGPU::BI__builtin_amdgcn_image_load_1darray_v4f16_i32: + case AMDGPU::BI__builtin_amdgcn_image_load_mip_1d_v4f32_i32: + case AMDGPU::BI__builtin_amdgcn_image_load_mip_1d_v4f16_i32: { + Slice = EmitScalarExpr(E->getArg(2)); + Rsrc = EmitScalarExpr(E->getArg(3)); + Tfe = EmitScalarExpr(E->getArg(4)); + Cpol = EmitScalarExpr(E->getArg(5)); + + ArgTys = {Dmask, S, Slice, Rsrc, Tfe, Cpol}; + IID = Intrinsic::amdgcn_image_load_1darray; + switch (BuiltinID) { + case AMDGPU::BI__builtin_amdgcn_image_load_mip_1d_v4f32_i32: + case AMDGPU::BI__builtin_amdgcn_image_load_mip_1d_v4f16_i32: + IID = Intrinsic::amdgcn_image_load_mip_1d; + break; + } + Call = Builder.CreateIntrinsic(RetTy, IID, ArgTys); + break; + } + case AMDGPU::BI__builtin_amdgcn_image_load_2d_f32_i32: + case AMDGPU::BI__builtin_amdgcn_image_load_2d_v4f32_i32: + case AMDGPU::BI__builtin_amdgcn_image_load_2d_v4f16_i32: { + Rsrc = EmitScalarExpr(E->getArg(3)); + Tfe = EmitScalarExpr(E->getArg(4)); + Cpol = EmitScalarExpr(E->getArg(5)); + + ArgTys = {Dmask, S, T, Rsrc, Tfe, Cpol}; + IID = Intrinsic::amdgcn_image_load_2d; + Call = Builder.CreateIntrinsic(RetTy, IID, ArgTys); + break; + } + case AMDGPU::BI__builtin_amdgcn_image_load_2darray_f32_i32: + case AMDGPU::BI__builtin_amdgcn_image_load_2darray_v4f32_i32: + case AMDGPU::BI__builtin_amdgcn_image_load_2darray_v4f16_i32: + case AMDGPU::BI__builtin_amdgcn_image_load_3d_v4f32_i32: + case AMDGPU::BI__builtin_amdgcn_image_load_3d_v4f16_i32: + case AMDGPU::BI__builtin_amdgcn_image_load_cube_v4f32_i32: + case AMDGPU::BI__builtin_amdgcn_image_load_cube_v4f16_i32: + case AMDGPU::BI__builtin_amdgcn_image_load_mip_1darray_v4f32_i32: + case AMDGPU::BI__builtin_amdgcn_image_load_mip_1darray_v4f16_i32: + case AMDGPU::BI__builtin_amdgcn_image_load_mip_2d_f32_i32: + case AMDGPU::BI__builtin_amdgcn_image_load_mip_2d_v4f32_i32: + case AMDGPU::BI__builtin_amdgcn_image_load_mip_2d_v4f16_i32: { + Slice = EmitScalarExpr(E->getArg(3)); + Rsrc = EmitScalarExpr(E->getArg(4)); + Tfe = EmitScalarExpr(E->getArg(5)); + Cpol = EmitScalarExpr(E->getArg(6)); + + ArgTys = {Dmask, S, T, Slice, Rsrc, Tfe, Cpol}; + IID = Intrinsic::amdgcn_image_load_2darray; + + switch (BuiltinID) { + case AMDGPU::BI__builtin_amdgcn_image_load_3d_v4f32_i32: + case AMDGPU::BI__builtin_amdgcn_image_load_3d_v4f16_i32: + IID = Intrinsic::amdgcn_image_load_3d; + break; + case AMDGPU::BI__builtin_amdgcn_image_load_cube_v4f32_i32: + case AMDGPU::BI__builtin_amdgcn_image_load_cube_v4f16_i32: + IID = Intrinsic::amdgcn_image_load_cube; + break; + case AMDGPU::BI__builtin_amdgcn_image_load_mip_1darray_v4f32_i32: + case AMDGPU::BI__builtin_amdgcn_image_load_mip_1darray_v4f16_i32: + IID = Intrinsic::amdgcn_image_load_mip_1darray; + break; + case AMDGPU::BI__builtin_amdgcn_image_load_mip_2d_f32_i32: + case AMDGPU::BI__builtin_amdgcn_image_load_mip_2d_v4f32_i32: + case AMDGPU::BI__builtin_amdgcn_image_load_mip_2d_v4f16_i32: + IID = Intrinsic::amdgcn_image_load_mip_2d; + break; + } + Call = Builder.CreateIntrinsic(RetTy, IID, ArgTys); + break; + } + case AMDGPU::BI__builtin_amdgcn_image_load_mip_2darray_f32_i32: + case AMDGPU::BI__builtin_amdgcn_image_load_mip_2darray_v4f32_i32: + case AMDGPU::BI__builtin_amdgcn_image_load_mip_2darray_v4f16_i32: + case AMDGPU::BI__builtin_amdgcn_image_load_mip_3d_v4f32_i32: + case AMDGPU::BI__builtin_amdgcn_image_load_mip_3d_v4f16_i32: + case AMDGPU::BI__builtin_amdgcn_image_load_mip_cube_v4f32_i32: + case AMDGPU::BI__builtin_amdgcn_image_load_mip_cube_v4f16_i32: { + Slice = EmitScalarExpr(E->getArg(3)); + Mip = EmitScalarExpr(E->getArg(4)); + Rsrc = EmitScalarExpr(E->getArg(5)); + Tfe = EmitScalarExpr(E->getArg(6)); + Cpol = EmitScalarExpr(E->getArg(7)); + + ArgTys = {Dmask, S, T, Slice, Mip, Rsrc, Tfe, Cpol}; + IID = Intrinsic::amdgcn_image_load_mip_2darray; + switch (BuiltinID) { + case AMDGPU::BI__builtin_amdgcn_image_load_mip_3d_v4f32_i32: + case AMDGPU::BI__builtin_amdgcn_image_load_mip_3d_v4f16_i32: + IID = Intrinsic::amdgcn_image_load_mip_3d; + break; + case AMDGPU::BI__builtin_amdgcn_image_load_mip_cube_v4f32_i32: + case AMDGPU::BI__builtin_amdgcn_image_load_mip_cube_v4f16_i32: + IID = Intrinsic::amdgcn_image_load_mip_cube; + break; + } + Call = Builder.CreateIntrinsic(RetTy, IID, ArgTys); + break; + } + } return Call; } diff --git a/clang/lib/Sema/SemaAMDGPU.cpp b/clang/lib/Sema/SemaAMDGPU.cpp index a6366aceec2a6..530d0e7553604 100644 --- a/clang/lib/Sema/SemaAMDGPU.cpp +++ b/clang/lib/Sema/SemaAMDGPU.cpp @@ -83,6 +83,38 @@ bool SemaAMDGPU::CheckAMDGCNBuiltinFunctionCall(unsigned BuiltinID, case AMDGPU::BI__builtin_amdgcn_update_dpp: { return checkMovDPPFunctionCall(TheCall, 6, 2); } + case AMDGPU::BI__builtin_amdgcn_image_load_1d_v4f32_i32: + case AMDGPU::BI__builtin_amdgcn_image_load_1darray_v4f32_i32: + case AMDGPU::BI__builtin_amdgcn_image_load_1d_v4f16_i32: + case AMDGPU::BI__builtin_amdgcn_image_load_1darray_v4f16_i32: + case AMDGPU::BI__builtin_amdgcn_image_load_2d_f32_i32: + case AMDGPU::BI__builtin_amdgcn_image_load_2d_v4f32_i32: + case AMDGPU::BI__builtin_amdgcn_image_load_2d_v4f16_i32: + case AMDGPU::BI__builtin_amdgcn_image_load_2darray_f32_i32: + case AMDGPU::BI__builtin_amdgcn_image_load_2darray_v4f32_i32: + case AMDGPU::BI__builtin_amdgcn_image_load_2darray_v4f16_i32: + case AMDGPU::BI__builtin_amdgcn_image_load_3d_v4f32_i32: + case AMDGPU::BI__builtin_amdgcn_image_load_3d_v4f16_i32: + case AMDGPU::BI__builtin_amdgcn_image_load_cube_v4f32_i32: + case AMDGPU::BI__builtin_amdgcn_image_load_cube_v4f16_i32: + case AMDGPU::BI__builtin_amdgcn_image_load_mip_1d_v4f32_i32: + case AMDGPU::BI__builtin_amdgcn_image_load_mip_1d_v4f16_i32: + case AMDGPU::BI__builtin_amdgcn_image_load_mip_1darray_v4f32_i32: + case AMDGPU::BI__builtin_amdgcn_image_load_mip_1darray_v4f16_i32: + case AMDGPU::BI__builtin_amdgcn_image_load_mip_2d_f32_i32: + case AMDGPU::BI__builtin_amdgcn_image_load_mip_2d_v4f32_i32: + case AMDGPU::BI__builtin_amdgcn_image_load_mip_2d_v4f16_i32: + case AMDGPU::BI__builtin_amdgcn_image_load_mip_2darray_f32_i32: + case AMDGPU::BI__builtin_amdgcn_image_load_mip_2darray_v4f32_i32: + case AMDGPU::BI__builtin_amdgcn_image_load_mip_2darray_v4f16_i32: + case AMDGPU::BI__builtin_amdgcn_image_load_mip_3d_v4f32_i32: + case AMDGPU::BI__builtin_amdgcn_image_load_mip_3d_v4f16_i32: + case AMDGPU::BI__builtin_amdgcn_image_load_mip_cube_v4f32_i32: + case AMDGPU::BI__builtin_amdgcn_image_load_mip_cube_v4f16_i32: { + unsigned ArgCount = TheCall->getNumArgs() - 1; + + return checkImageImmArgFunctionCall(TheCall, ArgCount); + } default: return false; } @@ -128,6 +160,16 @@ bool SemaAMDGPU::CheckAMDGCNBuiltinFunctionCall(unsigned BuiltinID, return false; } +bool SemaAMDGPU::checkImageImmArgFunctionCall(CallExpr *TheCall, + unsigned ArgCount) { + llvm::APSInt Result; + if (!(SemaRef.BuiltinConstantArg(TheCall, 0, Result)) && + !(SemaRef.BuiltinConstantArg(TheCall, ArgCount, Result)) && + !(SemaRef.BuiltinConstantArg(TheCall, (ArgCount - 1), Result))) + return false; + return true; +} + bool SemaAMDGPU::checkMovDPPFunctionCall(CallExpr *TheCall, unsigned NumArgs, unsigned NumDataArgs) { assert(NumDataArgs <= 2); diff --git a/clang/test/CodeGen/builtins-image-load-2d-f32.c b/clang/test/CodeGen/builtins-image-load-2d-f32.c index 78dab461c1f38..aee97af37aaf0 100644 --- a/clang/test/CodeGen/builtins-image-load-2d-f32.c +++ b/clang/test/CodeGen/builtins-image-load-2d-f32.c @@ -1,31 +1,722 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5 -// RUN: %clang_cc1 -triple amdgcn-unknown-unknown %s -emit-llvm -o - | FileCheck %s +// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa %s -emit-llvm -o - | FileCheck %s -#pragma OPENCL EXTENSION cl_khr_fp64:enable - -typedef int v8i __attribute__((ext_vector_type(8))); +typedef int int8 __attribute__((ext_vector_type(8))); +typedef float float4 __attribute__((ext_vector_type(4))); +typedef _Float16 half; +typedef half half4 __attribute__((ext_vector_type(4))); // CHECK-LABEL: define dso_local float @test_builtin_image_load_2d( -// CHECK-SAME: float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VECI32:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-SAME: float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0:[0-9]+]] { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[RETVAL:%.*]] = alloca float, align 4, addrspace(5) // CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5) // CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) -// CHECK-NEXT: [[VECI32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5) +// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5) // CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr // CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr // CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr -// CHECK-NEXT: [[VECI32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VECI32_ADDR]] to ptr +// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr // CHECK-NEXT: store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4 // CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 -// CHECK-NEXT: store <8 x i32> [[VECI32]], ptr [[VECI32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32 // CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 // CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 -// CHECK-NEXT: [[TMP2:%.*]] = load <8 x i32>, ptr [[VECI32_ADDR_ASCAST]], align 32 -// CHECK-NEXT: [[TMP3:%.*]] = call float @llvm.amdgcn.image.load.2d.f32.i32.v8i32(i32 1, i32 [[TMP0]], i32 [[TMP1]], <8 x i32> [[TMP2]], i32 0, i32 0) +// CHECK-NEXT: [[TMP2:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TMP3:%.*]] = call float @llvm.amdgcn.image.load.2d.f32.i32.v8i32(i32 12, i32 [[TMP0]], i32 [[TMP1]], <8 x i32> [[TMP2]], i32 106, i32 103) // CHECK-NEXT: ret float [[TMP3]] // -float test_builtin_image_load_2d(float f32, int i32, v8i veci32) { +float test_builtin_image_load_2d(float f32, int i32, int8 vec8i32) { + + return __builtin_amdgcn_image_load_2d_f32_i32(12, i32, i32, vec8i32, 106, 103); +} + +// CHECK-LABEL: define dso_local <4 x float> @test_builtin_image_load_2d_1( +// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5) +// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr +// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr +// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr +// CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TMP3:%.*]] = call <4 x float> @llvm.amdgcn.image.load.2d.v4f32.i32.v8i32(i32 100, i32 [[TMP0]], i32 [[TMP1]], <8 x i32> [[TMP2]], i32 120, i32 110) +// CHECK-NEXT: ret <4 x float> [[TMP3]] +// +float4 test_builtin_image_load_2d_1(float4 v4f32, int i32, int8 vec8i32) { + + return __builtin_amdgcn_image_load_2d_v4f32_i32(100, i32, i32, vec8i32, 120, 110); +} +// CHECK-LABEL: define dso_local <4 x half> @test_builtin_image_load_2d_2( +// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x half>, align 8, addrspace(5) +// CHECK-NEXT: [[V4F16_ADDR:%.*]] = alloca <4 x half>, align 8, addrspace(5) +// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5) +// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// CHECK-NEXT: [[V4F16_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F16_ADDR]] to ptr +// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr +// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr +// CHECK-NEXT: store <4 x half> [[V4F16]], ptr [[V4F16_ADDR_ASCAST]], align 8 +// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TMP3:%.*]] = call <4 x half> @llvm.amdgcn.image.load.2d.v4f16.i32.v8i32(i32 100, i32 [[TMP0]], i32 [[TMP1]], <8 x i32> [[TMP2]], i32 120, i32 110) +// CHECK-NEXT: ret <4 x half> [[TMP3]] +// +half4 test_builtin_image_load_2d_2(half4 v4f16, int i32, int8 vec8i32) { + + return __builtin_amdgcn_image_load_2d_v4f16_i32(100, i32, i32, vec8i32, 120, 110); +} + +// CHECK-LABEL: define dso_local float @test_builtin_image_load_2darray( +// CHECK-SAME: float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca float, align 4, addrspace(5) +// CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5) +// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5) +// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr +// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr +// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr +// CHECK-NEXT: store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TMP4:%.*]] = call float @llvm.amdgcn.image.load.2darray.f32.i32.v8i32(i32 100, i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], <8 x i32> [[TMP3]], i32 120, i32 110) +// CHECK-NEXT: ret float [[TMP4]] +// +float test_builtin_image_load_2darray(float f32, int i32, int8 vec8i32) { + + return __builtin_amdgcn_image_load_2darray_f32_i32(100, i32, i32, i32, vec8i32, 120, 110); +} +// CHECK-LABEL: define dso_local <4 x float> @test_builtin_image_load_2darray_1( +// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5) +// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr +// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr +// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr +// CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TMP4:%.*]] = call <4 x float> @llvm.amdgcn.image.load.2darray.v4f32.i32.v8i32(i32 100, i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], <8 x i32> [[TMP3]], i32 120, i32 110) +// CHECK-NEXT: ret <4 x float> [[TMP4]] +// +float4 test_builtin_image_load_2darray_1(float4 v4f32, int i32, int8 vec8i32) { + + return __builtin_amdgcn_image_load_2darray_v4f32_i32(100, i32, i32, i32, vec8i32, 120, 110); +} +// CHECK-LABEL: define dso_local <4 x half> @test_builtin_image_load_2darray_2( +// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x half>, align 8, addrspace(5) +// CHECK-NEXT: [[V4F16_ADDR:%.*]] = alloca <4 x half>, align 8, addrspace(5) +// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5) +// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// CHECK-NEXT: [[V4F16_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F16_ADDR]] to ptr +// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr +// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr +// CHECK-NEXT: store <4 x half> [[V4F16]], ptr [[V4F16_ADDR_ASCAST]], align 8 +// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TMP4:%.*]] = call <4 x half> @llvm.amdgcn.image.load.2darray.v4f16.i32.v8i32(i32 100, i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], <8 x i32> [[TMP3]], i32 120, i32 110) +// CHECK-NEXT: ret <4 x half> [[TMP4]] +// +half4 test_builtin_image_load_2darray_2(half4 v4f16, int i32, int8 vec8i32) { + + return __builtin_amdgcn_image_load_2darray_v4f16_i32(100, i32, i32, i32, vec8i32, 120, 110); +} + +// CHECK-LABEL: define dso_local <4 x float> @test_builtin_image_load_1d_1( +// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5) +// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr +// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr +// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr +// CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP1:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TMP2:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TMP3:%.*]] = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i32.v8i32(i32 100, i32 [[TMP0]], <8 x i32> [[TMP2]], i32 120, i32 110) +// CHECK-NEXT: ret <4 x float> [[TMP3]] +// +float4 test_builtin_image_load_1d_1(float4 v4f32, int i32, int8 vec8i32) { + + return __builtin_amdgcn_image_load_1d_v4f32_i32(100, i32, vec8i32, 120, 110); +} +// CHECK-LABEL: define dso_local <4 x half> @test_builtin_image_load_1d_2( +// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x half>, align 8, addrspace(5) +// CHECK-NEXT: [[V4F16_ADDR:%.*]] = alloca <4 x half>, align 8, addrspace(5) +// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5) +// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// CHECK-NEXT: [[V4F16_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F16_ADDR]] to ptr +// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr +// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr +// CHECK-NEXT: store <4 x half> [[V4F16]], ptr [[V4F16_ADDR_ASCAST]], align 8 +// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP1:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TMP2:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TMP3:%.*]] = call <4 x half> @llvm.amdgcn.image.load.1d.v4f16.i32.v8i32(i32 100, i32 [[TMP0]], <8 x i32> [[TMP2]], i32 120, i32 110) +// CHECK-NEXT: ret <4 x half> [[TMP3]] +// +half4 test_builtin_image_load_1d_2(half4 v4f16, int i32, int8 vec8i32) { + + return __builtin_amdgcn_image_load_1d_v4f16_i32(100, i32, vec8i32, 120, 110); +} + +// CHECK-LABEL: define dso_local <4 x float> @test_builtin_image_load_1darray_1( +// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5) +// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr +// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr +// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr +// CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TMP4:%.*]] = call <4 x float> @llvm.amdgcn.image.load.1darray.v4f32.i32.v8i32(i32 100, i32 [[TMP0]], i32 [[TMP2]], <8 x i32> [[TMP3]], i32 120, i32 110) +// CHECK-NEXT: ret <4 x float> [[TMP4]] +// +float4 test_builtin_image_load_1darray_1(float4 v4f32, int i32, int8 vec8i32) { + + return __builtin_amdgcn_image_load_1darray_v4f32_i32(100, i32, i32, vec8i32, 120, 110); +} +// CHECK-LABEL: define dso_local <4 x half> @test_builtin_image_load_1darray_2( +// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x half>, align 8, addrspace(5) +// CHECK-NEXT: [[V4F16_ADDR:%.*]] = alloca <4 x half>, align 8, addrspace(5) +// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5) +// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// CHECK-NEXT: [[V4F16_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F16_ADDR]] to ptr +// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr +// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr +// CHECK-NEXT: store <4 x half> [[V4F16]], ptr [[V4F16_ADDR_ASCAST]], align 8 +// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TMP4:%.*]] = call <4 x half> @llvm.amdgcn.image.load.1darray.v4f16.i32.v8i32(i32 100, i32 [[TMP0]], i32 [[TMP2]], <8 x i32> [[TMP3]], i32 120, i32 110) +// CHECK-NEXT: ret <4 x half> [[TMP4]] +// +half4 test_builtin_image_load_1darray_2(half4 v4f16, int i32, int8 vec8i32) { + + return __builtin_amdgcn_image_load_1darray_v4f16_i32(100, i32, i32, vec8i32, 120, 110); +} + +// CHECK-LABEL: define dso_local <4 x float> @test_builtin_image_load_3d_1( +// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5) +// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr +// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr +// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr +// CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TMP4:%.*]] = call <4 x float> @llvm.amdgcn.image.load.3d.v4f32.i32.v8i32(i32 100, i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], <8 x i32> [[TMP3]], i32 120, i32 110) +// CHECK-NEXT: ret <4 x float> [[TMP4]] +// +float4 test_builtin_image_load_3d_1(float4 v4f32, int i32, int8 vec8i32) { + + return __builtin_amdgcn_image_load_3d_v4f32_i32(100, i32, i32, i32, vec8i32, 120, 110); +} +// CHECK-LABEL: define dso_local <4 x half> @test_builtin_image_load_3d_2( +// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x half>, align 8, addrspace(5) +// CHECK-NEXT: [[V4F16_ADDR:%.*]] = alloca <4 x half>, align 8, addrspace(5) +// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5) +// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// CHECK-NEXT: [[V4F16_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F16_ADDR]] to ptr +// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr +// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr +// CHECK-NEXT: store <4 x half> [[V4F16]], ptr [[V4F16_ADDR_ASCAST]], align 8 +// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TMP4:%.*]] = call <4 x half> @llvm.amdgcn.image.load.3d.v4f16.i32.v8i32(i32 100, i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], <8 x i32> [[TMP3]], i32 120, i32 110) +// CHECK-NEXT: ret <4 x half> [[TMP4]] +// +half4 test_builtin_image_load_3d_2(half4 v4f16, int i32, int8 vec8i32) { + + return __builtin_amdgcn_image_load_3d_v4f16_i32(100, i32, i32, i32, vec8i32, 120, 110); +} + +// CHECK-LABEL: define dso_local <4 x float> @test_builtin_image_load_cube_1( +// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5) +// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr +// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr +// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr +// CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TMP4:%.*]] = call <4 x float> @llvm.amdgcn.image.load.cube.v4f32.i32.v8i32(i32 100, i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], <8 x i32> [[TMP3]], i32 120, i32 110) +// CHECK-NEXT: ret <4 x float> [[TMP4]] +// +float4 test_builtin_image_load_cube_1(float4 v4f32, int i32, int8 vec8i32) { + + return __builtin_amdgcn_image_load_cube_v4f32_i32(100, i32, i32, i32, vec8i32, 120, 110); +} +// CHECK-LABEL: define dso_local <4 x half> @test_builtin_image_load_cube_2( +// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x half>, align 8, addrspace(5) +// CHECK-NEXT: [[V4F16_ADDR:%.*]] = alloca <4 x half>, align 8, addrspace(5) +// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5) +// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// CHECK-NEXT: [[V4F16_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F16_ADDR]] to ptr +// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr +// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr +// CHECK-NEXT: store <4 x half> [[V4F16]], ptr [[V4F16_ADDR_ASCAST]], align 8 +// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TMP4:%.*]] = call <4 x half> @llvm.amdgcn.image.load.cube.v4f16.i32.v8i32(i32 100, i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], <8 x i32> [[TMP3]], i32 120, i32 110) +// CHECK-NEXT: ret <4 x half> [[TMP4]] +// +half4 test_builtin_image_load_cube_2(half4 v4f16, int i32, int8 vec8i32) { + + return __builtin_amdgcn_image_load_cube_v4f16_i32(100, i32, i32, i32, vec8i32, 120, 110); +} + +// CHECK-LABEL: define dso_local <4 x float> @test_builtin_image_load_mip_1d_1( +// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5) +// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr +// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr +// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr +// CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TMP4:%.*]] = call <4 x float> @llvm.amdgcn.image.load.mip.1d.v4f32.i32.v8i32(i32 100, i32 [[TMP0]], i32 [[TMP2]], <8 x i32> [[TMP3]], i32 120, i32 110) +// CHECK-NEXT: ret <4 x float> [[TMP4]] +// +float4 test_builtin_image_load_mip_1d_1(float4 v4f32, int i32, int8 vec8i32) { + + return __builtin_amdgcn_image_load_mip_1d_v4f32_i32(100, i32, i32, vec8i32, 120, 110); +} +// CHECK-LABEL: define dso_local <4 x half> @test_builtin_image_load_mip_1d_2( +// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x half>, align 8, addrspace(5) +// CHECK-NEXT: [[V4F16_ADDR:%.*]] = alloca <4 x half>, align 8, addrspace(5) +// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5) +// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// CHECK-NEXT: [[V4F16_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F16_ADDR]] to ptr +// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr +// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr +// CHECK-NEXT: store <4 x half> [[V4F16]], ptr [[V4F16_ADDR_ASCAST]], align 8 +// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TMP4:%.*]] = call <4 x half> @llvm.amdgcn.image.load.mip.1d.v4f16.i32.v8i32(i32 100, i32 [[TMP0]], i32 [[TMP2]], <8 x i32> [[TMP3]], i32 120, i32 110) +// CHECK-NEXT: ret <4 x half> [[TMP4]] +// +half4 test_builtin_image_load_mip_1d_2(half4 v4f16, int i32, int8 vec8i32) { + + return __builtin_amdgcn_image_load_mip_1d_v4f16_i32(100, i32, i32, vec8i32, 120, 110); +} + +// CHECK-LABEL: define dso_local <4 x float> @test_builtin_image_load_mip_1darray_1( +// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5) +// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr +// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr +// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr +// CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TMP4:%.*]] = call <4 x float> @llvm.amdgcn.image.load.mip.1darray.v4f32.i32.v8i32(i32 100, i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], <8 x i32> [[TMP3]], i32 120, i32 110) +// CHECK-NEXT: ret <4 x float> [[TMP4]] +// +float4 test_builtin_image_load_mip_1darray_1(float4 v4f32, int i32, int8 vec8i32) { + + return __builtin_amdgcn_image_load_mip_1darray_v4f32_i32(100, i32, i32, i32, vec8i32, 120, 110); +} +// CHECK-LABEL: define dso_local <4 x half> @test_builtin_image_load_mip_1darray_2( +// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x half>, align 8, addrspace(5) +// CHECK-NEXT: [[V4F16_ADDR:%.*]] = alloca <4 x half>, align 8, addrspace(5) +// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5) +// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// CHECK-NEXT: [[V4F16_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F16_ADDR]] to ptr +// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr +// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr +// CHECK-NEXT: store <4 x half> [[V4F16]], ptr [[V4F16_ADDR_ASCAST]], align 8 +// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TMP4:%.*]] = call <4 x half> @llvm.amdgcn.image.load.mip.1darray.v4f16.i32.v8i32(i32 100, i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], <8 x i32> [[TMP3]], i32 120, i32 110) +// CHECK-NEXT: ret <4 x half> [[TMP4]] +// +half4 test_builtin_image_load_mip_1darray_2(half4 v4f16, int i32, int8 vec8i32) { + + return __builtin_amdgcn_image_load_mip_1darray_v4f16_i32(100, i32, i32, i32, vec8i32, 120, 110); +} + +// CHECK-LABEL: define dso_local float @test_builtin_image_load_mip_2d( +// CHECK-SAME: float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca float, align 4, addrspace(5) +// CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5) +// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5) +// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr +// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr +// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr +// CHECK-NEXT: store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TMP4:%.*]] = call float @llvm.amdgcn.image.load.mip.2d.f32.i32.v8i32(i32 100, i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], <8 x i32> [[TMP3]], i32 120, i32 110) +// CHECK-NEXT: ret float [[TMP4]] +// +float test_builtin_image_load_mip_2d(float f32, int i32, int8 vec8i32) { + + return __builtin_amdgcn_image_load_mip_2d_f32_i32(100, i32, i32, i32, vec8i32, 120, 110); +} +// CHECK-LABEL: define dso_local <4 x float> @test_builtin_image_load_mip_2d_1( +// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5) +// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr +// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr +// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr +// CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TMP4:%.*]] = call <4 x float> @llvm.amdgcn.image.load.mip.2d.v4f32.i32.v8i32(i32 100, i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], <8 x i32> [[TMP3]], i32 120, i32 110) +// CHECK-NEXT: ret <4 x float> [[TMP4]] +// +float4 test_builtin_image_load_mip_2d_1(float4 v4f32, int i32, int8 vec8i32) { + + return __builtin_amdgcn_image_load_mip_2d_v4f32_i32(100, i32, i32, i32, vec8i32, 120, 110); +} +// CHECK-LABEL: define dso_local <4 x half> @test_builtin_image_load_mip_2d_2( +// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x half>, align 8, addrspace(5) +// CHECK-NEXT: [[V4F16_ADDR:%.*]] = alloca <4 x half>, align 8, addrspace(5) +// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5) +// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// CHECK-NEXT: [[V4F16_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F16_ADDR]] to ptr +// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr +// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr +// CHECK-NEXT: store <4 x half> [[V4F16]], ptr [[V4F16_ADDR_ASCAST]], align 8 +// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TMP4:%.*]] = call <4 x half> @llvm.amdgcn.image.load.mip.2d.v4f16.i32.v8i32(i32 100, i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], <8 x i32> [[TMP3]], i32 120, i32 110) +// CHECK-NEXT: ret <4 x half> [[TMP4]] +// +half4 test_builtin_image_load_mip_2d_2(half4 v4f16, int i32, int8 vec8i32) { + + return __builtin_amdgcn_image_load_mip_2d_v4f16_i32(100, i32, i32, i32, vec8i32, 120, 110); +} + +// CHECK-LABEL: define dso_local float @test_builtin_image_load_mip_2darray( +// CHECK-SAME: float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca float, align 4, addrspace(5) +// CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5) +// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5) +// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr +// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr +// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr +// CHECK-NEXT: store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP4:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TMP5:%.*]] = call float @llvm.amdgcn.image.load.mip.2darray.f32.i32.v8i32(i32 100, i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], i32 [[TMP3]], <8 x i32> [[TMP4]], i32 120, i32 110) +// CHECK-NEXT: ret float [[TMP5]] +// +float test_builtin_image_load_mip_2darray(float f32, int i32, int8 vec8i32) { + + return __builtin_amdgcn_image_load_mip_2darray_f32_i32(100, i32, i32, i32, i32, vec8i32, 120, 110); +} +// CHECK-LABEL: define dso_local <4 x float> @test_builtin_image_load_mip_2darray_1( +// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5) +// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr +// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr +// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr +// CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP4:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TMP5:%.*]] = call <4 x float> @llvm.amdgcn.image.load.mip.2darray.v4f32.i32.v8i32(i32 100, i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], i32 [[TMP3]], <8 x i32> [[TMP4]], i32 120, i32 110) +// CHECK-NEXT: ret <4 x float> [[TMP5]] +// +float4 test_builtin_image_load_mip_2darray_1(float4 v4f32, int i32, int8 vec8i32) { + + return __builtin_amdgcn_image_load_mip_2darray_v4f32_i32(100, i32, i32, i32, i32, vec8i32, 120, 110); +} +// CHECK-LABEL: define dso_local <4 x half> @test_builtin_image_load_mip_2darray_2( +// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x half>, align 8, addrspace(5) +// CHECK-NEXT: [[V4F16_ADDR:%.*]] = alloca <4 x half>, align 8, addrspace(5) +// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5) +// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// CHECK-NEXT: [[V4F16_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F16_ADDR]] to ptr +// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr +// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr +// CHECK-NEXT: store <4 x half> [[V4F16]], ptr [[V4F16_ADDR_ASCAST]], align 8 +// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP4:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TMP5:%.*]] = call <4 x half> @llvm.amdgcn.image.load.mip.2darray.v4f16.i32.v8i32(i32 100, i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], i32 [[TMP3]], <8 x i32> [[TMP4]], i32 120, i32 110) +// CHECK-NEXT: ret <4 x half> [[TMP5]] +// +half4 test_builtin_image_load_mip_2darray_2(half4 v4f16, int i32, int8 vec8i32) { + + return __builtin_amdgcn_image_load_mip_2darray_v4f16_i32(100, i32, i32, i32, i32, vec8i32, 120, 110); +} + +// CHECK-LABEL: define dso_local <4 x float> @test_builtin_image_load_mip_3d_1( +// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5) +// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr +// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr +// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr +// CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP4:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TMP5:%.*]] = call <4 x float> @llvm.amdgcn.image.load.mip.3d.v4f32.i32.v8i32(i32 100, i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], i32 [[TMP3]], <8 x i32> [[TMP4]], i32 120, i32 110) +// CHECK-NEXT: ret <4 x float> [[TMP5]] +// +float4 test_builtin_image_load_mip_3d_1(float4 v4f32, int i32, int8 vec8i32) { + + return __builtin_amdgcn_image_load_mip_3d_v4f32_i32(100, i32, i32, i32, i32, vec8i32, 120, 110); +} +// CHECK-LABEL: define dso_local <4 x half> @test_builtin_image_load_mip_3d_2( +// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x half>, align 8, addrspace(5) +// CHECK-NEXT: [[V4F16_ADDR:%.*]] = alloca <4 x half>, align 8, addrspace(5) +// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5) +// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// CHECK-NEXT: [[V4F16_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F16_ADDR]] to ptr +// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr +// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr +// CHECK-NEXT: store <4 x half> [[V4F16]], ptr [[V4F16_ADDR_ASCAST]], align 8 +// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP4:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TMP5:%.*]] = call <4 x half> @llvm.amdgcn.image.load.mip.3d.v4f16.i32.v8i32(i32 100, i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], i32 [[TMP3]], <8 x i32> [[TMP4]], i32 120, i32 110) +// CHECK-NEXT: ret <4 x half> [[TMP5]] +// +half4 test_builtin_image_load_mip_3d_2(half4 v4f16, int i32, int8 vec8i32) { + + return __builtin_amdgcn_image_load_mip_3d_v4f16_i32(100, i32, i32, i32, i32, vec8i32, 120, 110); +} + +// CHECK-LABEL: define dso_local <4 x float> @test_builtin_image_load_mip_cube_1( +// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5) +// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr +// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr +// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr +// CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP4:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TMP5:%.*]] = call <4 x float> @llvm.amdgcn.image.load.mip.cube.v4f32.i32.v8i32(i32 100, i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], i32 [[TMP3]], <8 x i32> [[TMP4]], i32 120, i32 110) +// CHECK-NEXT: ret <4 x float> [[TMP5]] +// +float4 test_builtin_image_load_mip_cube_1(float4 v4f32, int i32, int8 vec8i32) { + + return __builtin_amdgcn_image_load_mip_cube_v4f32_i32(100, i32, i32, i32, i32, vec8i32, 120, 110); +} +// CHECK-LABEL: define dso_local <4 x half> @test_builtin_image_load_mip_cube_2( +// CHECK-SAME: <4 x half> noundef [[V4F16:%.*]], i32 noundef [[I32:%.*]], <8 x i32> noundef [[VEC8I32:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x half>, align 8, addrspace(5) +// CHECK-NEXT: [[V4F16_ADDR:%.*]] = alloca <4 x half>, align 8, addrspace(5) +// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[VEC8I32_ADDR:%.*]] = alloca <8 x i32>, align 32, addrspace(5) +// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// CHECK-NEXT: [[V4F16_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F16_ADDR]] to ptr +// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr +// CHECK-NEXT: [[VEC8I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC8I32_ADDR]] to ptr +// CHECK-NEXT: store <4 x half> [[V4F16]], ptr [[V4F16_ADDR_ASCAST]], align 8 +// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store <8 x i32> [[VEC8I32]], ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP4:%.*]] = load <8 x i32>, ptr [[VEC8I32_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TMP5:%.*]] = call <4 x half> @llvm.amdgcn.image.load.mip.cube.v4f16.i32.v8i32(i32 100, i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], i32 [[TMP3]], <8 x i32> [[TMP4]], i32 120, i32 110) +// CHECK-NEXT: ret <4 x half> [[TMP5]] +// +half4 test_builtin_image_load_mip_cube_2(half4 v4f16, int i32, int8 vec8i32) { - return __builtin_amdgcn_image_load_2d_f32_i32(i32, i32, veci32); + return __builtin_amdgcn_image_load_mip_cube_v4f16_i32(100, i32, i32, i32, i32, vec8i32, 120, 110); } diff --git a/clang/test/SemaOpenCL/builtins-image-load-2d-f32-param.cl b/clang/test/SemaOpenCL/builtins-image-load-2d-f32-param.cl new file mode 100644 index 0000000000000..7b5aab4011da9 --- /dev/null +++ b/clang/test/SemaOpenCL/builtins-image-load-2d-f32-param.cl @@ -0,0 +1,132 @@ +// RUN: %clang_cc1 -triple amdgcn-- -target-cpu gfx90a -S -verify=expected -o - %s +// REQUIRES: amdgpu-registered-target + +typedef int int8 __attribute__((ext_vector_type(8))); +typedef float float4 __attribute__((ext_vector_type(4))); +//typedef _Float16 half; +typedef half half4 __attribute__((ext_vector_type(4))); + + +float test_builtin_image_load_2d(float f32, int i32, int8 vec8i32) { + + return __builtin_amdgcn_image_load_2d_f32_i32(i32, i32, i32, vec8i32, 106, 103); //expected-error{{argument to '__builtin_amdgcn_image_load_2d_f32_i32' must be a constant integer}} +} +float4 test_builtin_image_load_2d_1(float4 v4f32, int i32, int8 vec8i32) { + + return __builtin_amdgcn_image_load_2d_v4f32_i32(100, i32, i32, vec8i32, i32, 110); //expected-error{{argument to '__builtin_amdgcn_image_load_2d_v4f32_i32' must be a constant integer}} +} +half4 test_builtin_image_load_2d_2(half4 v4f16, int i32, int8 vec8i32) { + + return __builtin_amdgcn_image_load_2d_v4f16_i32(100, i32, i32, vec8i32, 120, i32); //expected-error{{argument to '__builtin_amdgcn_image_load_2d_v4f16_i32' must be a constant integer}} +} + +float test_builtin_image_load_2darray(float f32, int i32, int8 vec8i32) { + + return __builtin_amdgcn_image_load_2darray_f32_i32(100, i32, i32, i32, vec8i32, i32, 110); //expected-error{{argument to '__builtin_amdgcn_image_load_2darray_f32_i32' must be a constant integer}} +} +float4 test_builtin_image_load_2darray_1(float4 v4f32, int i32, int8 vec8i32) { + + return __builtin_amdgcn_image_load_2darray_v4f32_i32(100, i32, i32, i32, vec8i32, i32, 110); //expected-error{{argument to '__builtin_amdgcn_image_load_2darray_v4f32_i32' must be a constant integer}} +} +half4 test_builtin_image_load_2darray_2(half4 v4f16, int i32, int8 vec8i32) { + + return __builtin_amdgcn_image_load_2darray_v4f16_i32(100, i32, i32, i32, vec8i32, 120, i32); //expected-error{{argument to '__builtin_amdgcn_image_load_2darray_v4f16_i32' must be a constant integer}} +} + +float4 test_builtin_image_load_1d_1(float4 v4f32, int i32, int8 vec8i32) { + + return __builtin_amdgcn_image_load_1d_v4f32_i32(i32, i32, vec8i32, 120, i32); //expected-error{{argument to '__builtin_amdgcn_image_load_1d_v4f32_i32' must be a constant integer}} +} +half4 test_builtin_image_load_1d_2(half4 v4f16, int i32, int8 vec8i32) { + + return __builtin_amdgcn_image_load_1d_v4f16_i32(100, i32, vec8i32, 120, i32); //expected-error{{argument to '__builtin_amdgcn_image_load_1d_v4f16_i32' must be a constant integer}} +} + +float4 test_builtin_image_load_1darray_1(float4 v4f32, int i32, int8 vec8i32) { + + return __builtin_amdgcn_image_load_1darray_v4f32_i32(100, i32, i32, vec8i32, i32, 110); //expected-error{{argument to '__builtin_amdgcn_image_load_1darray_v4f32_i32' must be a constant integer}} +} +half4 test_builtin_image_load_1darray_2(half4 v4f16, int i32, int8 vec8i32) { + + return __builtin_amdgcn_image_load_1darray_v4f16_i32(100, i32, i32, vec8i32, i32, 110); //expected-error{{argument to '__builtin_amdgcn_image_load_1darray_v4f16_i32' must be a constant integer}} +} + +float4 test_builtin_image_load_3d_1(float4 v4f32, int i32, int8 vec8i32) { + + return __builtin_amdgcn_image_load_3d_v4f32_i32(100, i32, i32, i32, vec8i32, 120, i32); //expected-error{{argument to '__builtin_amdgcn_image_load_3d_v4f32_i32' must be a constant integer}} +} +half4 test_builtin_image_load_3d_2(half4 v4f16, int i32, int8 vec8i32) { + + return __builtin_amdgcn_image_load_3d_v4f16_i32(i32, i32, i32, i32, vec8i32, 120, i32); //expected-error{{argument to '__builtin_amdgcn_image_load_3d_v4f16_i32' must be a constant integer}} +} + +float4 test_builtin_image_load_cube_1(float4 v4f32, int i32, int8 vec8i32) { + + return __builtin_amdgcn_image_load_cube_v4f32_i32(i32, i32, i32, i32, vec8i32, 120, 110); //expected-error{{argument to '__builtin_amdgcn_image_load_cube_v4f32_i32' must be a constant integer}} +} +half4 test_builtin_image_load_cube_2(half4 v4f16, int i32, int8 vec8i32) { + + return __builtin_amdgcn_image_load_cube_v4f16_i32(i32, i32, i32, i32, vec8i32, 120, 110); //expected-error{{argument to '__builtin_amdgcn_image_load_cube_v4f16_i32' must be a constant integer}} +} + +float4 test_builtin_image_load_mip_1d_1(float4 v4f32, int i32, int8 vec8i32) { + + return __builtin_amdgcn_image_load_mip_1d_v4f32_i32(i32, i32, i32, vec8i32, 120, i32); //expected-error{{argument to '__builtin_amdgcn_image_load_mip_1d_v4f32_i32' must be a constant integer}} +} +half4 test_builtin_image_load_mip_1d_2(half4 v4f16, int i32, int8 vec8i32) { + + return __builtin_amdgcn_image_load_mip_1d_v4f16_i32(100, i32, i32, vec8i32, 120, i32); //expected-error{{argument to '__builtin_amdgcn_image_load_mip_1d_v4f16_i32' must be a constant integer}} +} + +float4 test_builtin_image_load_mip_1darray_1(float4 v4f32, int i32, int8 vec8i32) { + + return __builtin_amdgcn_image_load_mip_1darray_v4f32_i32(i32, i32, i32, i32, vec8i32, i32, 110); //expected-error{{argument to '__builtin_amdgcn_image_load_mip_1darray_v4f32_i32' must be a constant integer}} +} +half4 test_builtin_image_load_mip_1darray_2(half4 v4f16, int i32, int8 vec8i32) { + + return __builtin_amdgcn_image_load_mip_1darray_v4f16_i32(100, i32, i32, i32, vec8i32, i32, 110); //expected-error{{argument to '__builtin_amdgcn_image_load_mip_1darray_v4f16_i32' must be a constant integer}} +} + +float test_builtin_image_load_mip_2d(float f32, int i32, int8 vec8i32) { + + return __builtin_amdgcn_image_load_mip_2d_f32_i32(i32, i32, i32, i32, vec8i32, 120, i32); //expected-error{{argument to '__builtin_amdgcn_image_load_mip_2d_f32_i32' must be a constant integer}} +} +float4 test_builtin_image_load_mip_2d_1(float4 v4f32, int i32, int8 vec8i32) { + + return __builtin_amdgcn_image_load_mip_2d_v4f32_i32(100, i32, i32, i32, vec8i32, 120, i32); //expected-error{{argument to '__builtin_amdgcn_image_load_mip_2d_v4f32_i32' must be a constant integer}} +} +half4 test_builtin_image_load_mip_2d_2(half4 v4f16, int i32, int8 vec8i32) { + + return __builtin_amdgcn_image_load_mip_2d_v4f16_i32(i32, i32, i32, i32, vec8i32, 120, 110); //expected-error{{argument to '__builtin_amdgcn_image_load_mip_2d_v4f16_i32' must be a constant integer}} +} + +float test_builtin_image_load_mip_2darray(float f32, int i32, int8 vec8i32) { + + return __builtin_amdgcn_image_load_mip_2darray_f32_i32(i32, i32, i32, i32, i32, vec8i32, 120, 110); //expected-error{{argument to '__builtin_amdgcn_image_load_mip_2darray_f32_i32' must be a constant integer}} +} +float4 test_builtin_image_load_mip_2darray_1(float4 v4f32, int i32, int8 vec8i32) { + + return __builtin_amdgcn_image_load_mip_2darray_v4f32_i32(100, i32, i32, i32, i32, vec8i32, 120, i32); //expected-error{{argument to '__builtin_amdgcn_image_load_mip_2darray_v4f32_i32' must be a constant integer}} +} +half4 test_builtin_image_load_mip_2darray_2(half4 v4f16, int i32, int8 vec8i32) { + + return __builtin_amdgcn_image_load_mip_2darray_v4f16_i32(100, i32, i32, i32, i32, vec8i32, 120, i32); //expected-error{{argument to '__builtin_amdgcn_image_load_mip_2darray_v4f16_i32' must be a constant integer}} +} + +float4 test_builtin_image_load_mip_3d_1(float4 v4f32, int i32, int8 vec8i32) { + + return __builtin_amdgcn_image_load_mip_3d_v4f32_i32(i32, i32, i32, i32, i32, vec8i32, i32, 110); //expected-error{{argument to '__builtin_amdgcn_image_load_mip_3d_v4f32_i32' must be a constant integer}} +} +half4 test_builtin_image_load_mip_3d_2(half4 v4f16, int i32, int8 vec8i32) { + + return __builtin_amdgcn_image_load_mip_3d_v4f16_i32(i32, i32, i32, i32, i32, vec8i32, i32, 110); //expected-error{{argument to '__builtin_amdgcn_image_load_mip_3d_v4f16_i32' must be a constant integer}} +} + +float4 test_builtin_image_load_mip_cube_1(float4 v4f32, int i32, int8 vec8i32) { + + return __builtin_amdgcn_image_load_mip_cube_v4f32_i32(i32, i32, i32, i32, i32, vec8i32, 120, i32); //expected-error{{argument to '__builtin_amdgcn_image_load_mip_cube_v4f32_i32' must be a constant integer}} +} +half4 test_builtin_image_load_mip_cube_2(half4 v4f16, int i32, int8 vec8i32) { + + return __builtin_amdgcn_image_load_mip_cube_v4f16_i32(100, i32, i32, i32, i32, vec8i32, 120, i32); //expected-error{{argument to '__builtin_amdgcn_image_load_mip_cube_v4f16_i32' must be a constant integer}} +} _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits