[clang] [CIR][AMDGPU] Adds lowering for amdgcn image load/store builtins (PR #198184)

Rana Pratap Reddy via cfe-commits Mon, 25 May 2026 12:39:37 -0700

https://github.com/ranapratap55 updated 
https://github.com/llvm/llvm-project/pull/198184


>From b0a5ba5fb9175a9629ef18e3c5f46914f84e6fd5 Mon Sep 17 00:00:00 2001
From: ranapratap55 <[email protected]>
Date: Sun, 17 May 2026 21:05:41 +0530
Subject: [PATCH 1/2] [CIR][AMDGPU] Adds lowering for amdgcn image load/store
 builtins

---
 clang/lib/CIR/CodeGen/CIRGenBuiltinAMDGPU.cpp | 118 ++++-
 clang/lib/CIR/CodeGen/CIRGenTypes.cpp         |  17 +
 .../CIR/CodeGenHIP/builtins-amdgcn-image.hip  | 466 ++++++++++++++++++
 3 files changed, 589 insertions(+), 12 deletions(-)
 create mode 100644 clang/test/CIR/CodeGenHIP/builtins-amdgcn-image.hip

diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinAMDGPU.cpp 
b/clang/lib/CIR/CodeGen/CIRGenBuiltinAMDGPU.cpp
index 7e6e0f1a06046..10f111d1f3cfd 100644
--- a/clang/lib/CIR/CodeGen/CIRGenBuiltinAMDGPU.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenBuiltinAMDGPU.cpp
@@ -91,6 +91,59 @@ static mlir::Value emitLogbBuiltin(CIRGenFunction &cgf, 
const CallExpr *e,
   return res;
 }
 
+static mlir::Value
+emitAMDGCNImageOverloadedReturnType(CIRGenFunction &cgf, const CallExpr *e,
+                                    llvm::StringRef intrinsicName,
+                                    bool isImageStore) {
+  auto &builder = cgf.getBuilder();
+
+  auto findTextureDescIndex = [&cgf](const CallExpr *e) -> unsigned {
+    QualType texQT = cgf.getContext().AMDGPUTextureTy;
+    for (unsigned i = 0, n = e->getNumArgs(); i < n; ++i) {
+      QualType argTy = e->getArg(i)->getType();
+      if (argTy == texQT ||
+          argTy.getCanonicalType() == texQT.getCanonicalType()) {
+        return i;
+      }
+    }
+    return ~0U;
+  };
+
+  unsigned rsrcIndex = findTextureDescIndex(e);
+  if (rsrcIndex == ~0U) {
+    llvm::report_fatal_error("Invalid argument count for image builtin");
+  }
+
+  cir::VectorType vec8I32Ty = cir::VectorType::get(builder.getSInt32Ty(), 8);
+
+  llvm::SmallVector<mlir::Value, 10> args;
+  for (unsigned i = 0, n = e->getNumArgs(); i < n; ++i) {
+    mlir::Value v = cgf.emitScalarExpr(e->getArg(i));
+
+    if (i == rsrcIndex) {
+      mlir::Type vTy = v.getType();
+      if (mlir::isa<cir::PointerType>(vTy)) {
+        v = builder.createAlignedLoad(cgf.getLoc(e->getExprLoc()), vec8I32Ty, 
v,
+                                      CharUnits::fromQuantity(32));
+      }
+    }
+    args.push_back(v);
+  }
+
+  mlir::Type retTy;
+  if (isImageStore) {
+    retTy = cir::VoidType::get(builder.getContext());
+  } else {
+    retTy = cgf.convertType(e->getType());
+  }
+
+  auto callOp = cir::LLVMIntrinsicCallOp::create(
+      builder, cgf.getLoc(e->getExprLoc()),
+      builder.getStringAttr(intrinsicName), retTy, args);
+
+  return callOp.getResult();
+}
+
 std::optional<mlir::Value>
 CIRGenFunction::emitAMDGPUBuiltinExpr(unsigned builtinId,
                                       const CallExpr *expr) {
@@ -523,67 +576,108 @@ CIRGenFunction::emitAMDGPUBuiltinExpr(unsigned builtinId,
   }
   case AMDGPU::BI__builtin_amdgcn_image_load_1d_v4f32_i32:
   case AMDGPU::BI__builtin_amdgcn_image_load_1d_v4f16_i32:
+    return emitAMDGCNImageOverloadedReturnType(*this, expr,
+                                               "amdgcn.image.load.1d", false);
   case AMDGPU::BI__builtin_amdgcn_image_load_1darray_v4f32_i32:
   case AMDGPU::BI__builtin_amdgcn_image_load_1darray_v4f16_i32:
+    return emitAMDGCNImageOverloadedReturnType(
+        *this, expr, "amdgcn.image.load.1darray", false);
   case AMDGPU::BI__builtin_amdgcn_image_load_2d_f32_i32:
   case AMDGPU::BI__builtin_amdgcn_image_load_2d_v4f32_i32:
   case AMDGPU::BI__builtin_amdgcn_image_load_2d_v4f16_i32:
+    return emitAMDGCNImageOverloadedReturnType(*this, expr,
+                                               "amdgcn.image.load.2d", false);
   case AMDGPU::BI__builtin_amdgcn_image_load_2darray_f32_i32:
   case AMDGPU::BI__builtin_amdgcn_image_load_2darray_v4f32_i32:
   case AMDGPU::BI__builtin_amdgcn_image_load_2darray_v4f16_i32:
+    return emitAMDGCNImageOverloadedReturnType(
+        *this, expr, "amdgcn.image.load.2darray", false);
   case AMDGPU::BI__builtin_amdgcn_image_load_3d_v4f32_i32:
   case AMDGPU::BI__builtin_amdgcn_image_load_3d_v4f16_i32:
+    return emitAMDGCNImageOverloadedReturnType(*this, expr,
+                                               "amdgcn.image.load.3d", false);
   case AMDGPU::BI__builtin_amdgcn_image_load_cube_v4f32_i32:
   case AMDGPU::BI__builtin_amdgcn_image_load_cube_v4f16_i32:
+    return emitAMDGCNImageOverloadedReturnType(*this, expr,
+                                               "amdgcn.image.load.cube", 
false);
   case AMDGPU::BI__builtin_amdgcn_image_load_mip_1d_v4f32_i32:
   case AMDGPU::BI__builtin_amdgcn_image_load_mip_1d_v4f16_i32:
+    return emitAMDGCNImageOverloadedReturnType(
+        *this, expr, "amdgcn.image.load.mip.1d", false);
+  case AMDGPU::BI__builtin_amdgcn_image_load_mip_1darray_v4f32_i32:
+  case AMDGPU::BI__builtin_amdgcn_image_load_mip_1darray_v4f16_i32:
+    return emitAMDGCNImageOverloadedReturnType(
+        *this, expr, "amdgcn.image.load.mip.1darray", false);
+  case AMDGPU::BI__builtin_amdgcn_image_load_mip_2d_f32_i32:
   case AMDGPU::BI__builtin_amdgcn_image_load_mip_2d_v4f32_i32:
   case AMDGPU::BI__builtin_amdgcn_image_load_mip_2d_v4f16_i32:
+    return emitAMDGCNImageOverloadedReturnType(
+        *this, expr, "amdgcn.image.load.mip.2d", false);
   case AMDGPU::BI__builtin_amdgcn_image_load_mip_2darray_f32_i32:
   case AMDGPU::BI__builtin_amdgcn_image_load_mip_2darray_v4f32_i32:
   case AMDGPU::BI__builtin_amdgcn_image_load_mip_2darray_v4f16_i32:
+    return emitAMDGCNImageOverloadedReturnType(
+        *this, expr, "amdgcn.image.load.mip.2darray", false);
   case AMDGPU::BI__builtin_amdgcn_image_load_mip_3d_v4f32_i32:
   case AMDGPU::BI__builtin_amdgcn_image_load_mip_3d_v4f16_i32:
+    return emitAMDGCNImageOverloadedReturnType(
+        *this, expr, "amdgcn.image.load.mip.3d", false);
   case AMDGPU::BI__builtin_amdgcn_image_load_mip_cube_v4f32_i32:
-  case AMDGPU::BI__builtin_amdgcn_image_load_mip_cube_v4f16_i32: {
-    cgm.errorNYI(expr->getSourceRange(),
-                 std::string("unimplemented AMDGPU builtin call: ") +
-                     getContext().BuiltinInfo.getName(builtinId));
-    return mlir::Value{};
-  }
+  case AMDGPU::BI__builtin_amdgcn_image_load_mip_cube_v4f16_i32:
+    return emitAMDGCNImageOverloadedReturnType(
+        *this, expr, "amdgcn.image.load.mip.cube", false);
   case AMDGPU::BI__builtin_amdgcn_image_store_1d_v4f32_i32:
   case AMDGPU::BI__builtin_amdgcn_image_store_1d_v4f16_i32:
+    return emitAMDGCNImageOverloadedReturnType(*this, expr,
+                                               "amdgcn.image.store.1d", true);
   case AMDGPU::BI__builtin_amdgcn_image_store_1darray_v4f32_i32:
   case AMDGPU::BI__builtin_amdgcn_image_store_1darray_v4f16_i32:
+    return emitAMDGCNImageOverloadedReturnType(
+        *this, expr, "amdgcn.image.store.1darray", true);
   case AMDGPU::BI__builtin_amdgcn_image_store_2d_f32_i32:
   case AMDGPU::BI__builtin_amdgcn_image_store_2d_v4f32_i32:
   case AMDGPU::BI__builtin_amdgcn_image_store_2d_v4f16_i32:
+    return emitAMDGCNImageOverloadedReturnType(*this, expr,
+                                               "amdgcn.image.store.2d", true);
   case AMDGPU::BI__builtin_amdgcn_image_store_2darray_f32_i32:
   case AMDGPU::BI__builtin_amdgcn_image_store_2darray_v4f32_i32:
   case AMDGPU::BI__builtin_amdgcn_image_store_2darray_v4f16_i32:
+    return emitAMDGCNImageOverloadedReturnType(
+        *this, expr, "amdgcn.image.store.2darray", true);
   case AMDGPU::BI__builtin_amdgcn_image_store_3d_v4f32_i32:
   case AMDGPU::BI__builtin_amdgcn_image_store_3d_v4f16_i32:
+    return emitAMDGCNImageOverloadedReturnType(*this, expr,
+                                               "amdgcn.image.store.3d", true);
   case AMDGPU::BI__builtin_amdgcn_image_store_cube_v4f32_i32:
   case AMDGPU::BI__builtin_amdgcn_image_store_cube_v4f16_i32:
+    return emitAMDGCNImageOverloadedReturnType(*this, expr,
+                                               "amdgcn.image.store.cube", 
true);
   case AMDGPU::BI__builtin_amdgcn_image_store_mip_1d_v4f32_i32:
   case AMDGPU::BI__builtin_amdgcn_image_store_mip_1d_v4f16_i32:
+    return emitAMDGCNImageOverloadedReturnType(
+        *this, expr, "amdgcn.image.store.mip.1d", true);
   case AMDGPU::BI__builtin_amdgcn_image_store_mip_1darray_v4f32_i32:
   case AMDGPU::BI__builtin_amdgcn_image_store_mip_1darray_v4f16_i32:
+    return emitAMDGCNImageOverloadedReturnType(
+        *this, expr, "amdgcn.image.store.mip.1darray", true);
   case AMDGPU::BI__builtin_amdgcn_image_store_mip_2d_f32_i32:
   case AMDGPU::BI__builtin_amdgcn_image_store_mip_2d_v4f32_i32:
   case AMDGPU::BI__builtin_amdgcn_image_store_mip_2d_v4f16_i32:
+    return emitAMDGCNImageOverloadedReturnType(
+        *this, expr, "amdgcn.image.store.mip.2d", true);
   case AMDGPU::BI__builtin_amdgcn_image_store_mip_2darray_f32_i32:
   case AMDGPU::BI__builtin_amdgcn_image_store_mip_2darray_v4f32_i32:
   case AMDGPU::BI__builtin_amdgcn_image_store_mip_2darray_v4f16_i32:
+    return emitAMDGCNImageOverloadedReturnType(
+        *this, expr, "amdgcn.image.store.mip.2darray", true);
   case AMDGPU::BI__builtin_amdgcn_image_store_mip_3d_v4f32_i32:
   case AMDGPU::BI__builtin_amdgcn_image_store_mip_3d_v4f16_i32:
+    return emitAMDGCNImageOverloadedReturnType(
+        *this, expr, "amdgcn.image.store.mip.3d", true);
   case AMDGPU::BI__builtin_amdgcn_image_store_mip_cube_v4f32_i32:
-  case AMDGPU::BI__builtin_amdgcn_image_store_mip_cube_v4f16_i32: {
-    cgm.errorNYI(expr->getSourceRange(),
-                 std::string("unimplemented AMDGPU builtin call: ") +
-                     getContext().BuiltinInfo.getName(builtinId));
-    return mlir::Value{};
-  }
+  case AMDGPU::BI__builtin_amdgcn_image_store_mip_cube_v4f16_i32:
+    return emitAMDGCNImageOverloadedReturnType(
+        *this, expr, "amdgcn.image.store.mip.cube", true);
   case AMDGPU::BI__builtin_amdgcn_image_sample_1d_v4f32_f32:
   case AMDGPU::BI__builtin_amdgcn_image_sample_1d_v4f16_f32:
   case AMDGPU::BI__builtin_amdgcn_image_sample_1darray_v4f32_f32:
diff --git a/clang/lib/CIR/CodeGen/CIRGenTypes.cpp 
b/clang/lib/CIR/CodeGen/CIRGenTypes.cpp
index 85b7e854abb7f..61e2365bd5255 100644
--- a/clang/lib/CIR/CodeGen/CIRGenTypes.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenTypes.cpp
@@ -486,6 +486,23 @@ mlir::Type CIRGenTypes::convertType(QualType type) {
       resultType = builder.getVoidPtrTy();
       break;
 
+#define AMDGPU_OPAQUE_PTR_TYPE(Name, Id, SingletonId, Width, Align, AS)        
\
+  case BuiltinType::Id: {                                                      
\
+    if (BuiltinType::Id == BuiltinType::AMDGPUTexture) {                       
\
+      resultType = cir::VectorType::get(builder.getSInt32Ty(), 8);             
\
+    } else {                                                                   
\
+      resultType = builder.getPointerTo(cgm.voidTy);                           
\
+    }                                                                          
\
+    break;                                                                     
\
+  }
+#define AMDGPU_NAMED_BARRIER_TYPE(Name, Id, SingletonId, Width, Align, Scope)  
\
+  case BuiltinType::Id:                                                        
\
+    llvm_unreachable("NYI");
+#define AMDGPU_TYPE(Name, Id, SingletonId, Width, Align)                       
\
+  case BuiltinType::Id:                                                        
\
+    llvm_unreachable("NYI");
+#include "clang/Basic/AMDGPUTypes.def"
+
     default:
       cgm.errorNYI(SourceLocation(), "processing of built-in type", type);
       resultType = cgm.sInt32Ty;
diff --git a/clang/test/CIR/CodeGenHIP/builtins-amdgcn-image.hip 
b/clang/test/CIR/CodeGenHIP/builtins-amdgcn-image.hip
new file mode 100644
index 0000000000000..6f90d9efee50d
--- /dev/null
+++ b/clang/test/CIR/CodeGenHIP/builtins-amdgcn-image.hip
@@ -0,0 +1,466 @@
+#include "../CodeGenCUDA/Inputs/cuda.h"
+
+// REQUIRES: amdgpu-registered-target
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -std=c++11 -fclangir \
+// RUN:            -target-cpu gfx1100 -fcuda-is-device -emit-cir %s -o %t.cir
+// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
+
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -std=c++11 -fclangir \
+// RUN:            -target-cpu gfx1100 -fcuda-is-device -emit-llvm %s -o 
%t-cir.ll
+// RUN: FileCheck --check-prefix=LLVM --input-file=%t-cir.ll %s
+
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -std=c++11 \
+// RUN:            -target-cpu gfx1100 -fcuda-is-device -emit-llvm %s -o %t.ll
+// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s
+
+typedef float float4 __attribute__((ext_vector_type(4)));
+typedef _Float16 half;
+typedef half half4 __attribute__((ext_vector_type(4)));
+
+// CIR-LABEL: @_Z24test_image_load_1d_v4f32
+// CIR: cir.call_llvm_intrinsic "amdgcn.image.load.1d" {{.*}} : (!s32i, !s32i, 
!cir.vector<8 x !s32i>, !s32i, !s32i) -> !cir.vector<4 x !cir.float>
+// LLVM: define{{.*}} <4 x float> 
@_Z24test_image_load_1d_v4f32iu18__amdgpu_texture_t(
+// LLVM: call {{.*}}<4 x float> @llvm.amdgcn.image.load.1d.v4f32.i32.v8i32(i32 
{{.*}}, i32 {{.*}}, <8 x i32> {{.*}}, i32 {{.*}}, i32 {{.*}})
+__device__ float4 test_image_load_1d_v4f32(int x, __amdgpu_texture_t rsrc) {
+  return __builtin_amdgcn_image_load_1d_v4f32_i32(15, x, rsrc, 120, 110);
+}
+
+// CIR-LABEL: @_Z24test_image_load_1d_v4f16
+// CIR: cir.call_llvm_intrinsic "amdgcn.image.load.1d" {{.*}} : (!s32i, !s32i, 
!cir.vector<8 x !s32i>, !s32i, !s32i) -> !cir.vector<4 x !cir.f16>
+// LLVM: define{{.*}} <4 x half> 
@_Z24test_image_load_1d_v4f16iu18__amdgpu_texture_t(
+// LLVM: call {{.*}}<4 x half> @llvm.amdgcn.image.load.1d.v4f16.i32.v8i32(i32 
{{.*}}, i32 {{.*}}, <8 x i32> {{.*}}, i32 {{.*}}, i32 {{.*}})
+__device__ half4 test_image_load_1d_v4f16(int x, __amdgpu_texture_t rsrc) {
+  return __builtin_amdgcn_image_load_1d_v4f16_i32(15, x, rsrc, 120, 110);
+}
+
+// CIR-LABEL: @_Z25test_image_store_1d_v4f32
+// CIR: cir.call_llvm_intrinsic "amdgcn.image.store.1d" {{.*}} : 
(!cir.vector<4 x !cir.float>, !s32i, !s32i, !cir.vector<8 x !s32i>, !s32i, 
!s32i) -> !void
+// LLVM: define{{.*}} void 
@_Z25test_image_store_1d_v4f32Dv4_fiu18__amdgpu_texture_t(
+// LLVM: call void @llvm.amdgcn.image.store.1d.v4f32.i32.v8i32(<4 x float> 
{{.*}}, i32 {{.*}}, i32 {{.*}}, <8 x i32> {{.*}}, i32 {{.*}}, i32 {{.*}})
+__device__ void test_image_store_1d_v4f32(float4 val, int x, 
__amdgpu_texture_t rsrc) {
+  __builtin_amdgcn_image_store_1d_v4f32_i32(val, 15, x, rsrc, 120, 110);
+}
+
+// CIR-LABEL: @_Z25test_image_store_1d_v4f16
+// CIR: cir.call_llvm_intrinsic "amdgcn.image.store.1d" {{.*}} : 
(!cir.vector<4 x !cir.f16>, !s32i, !s32i, !cir.vector<8 x !s32i>, !s32i, !s32i) 
-> !void
+// LLVM: define{{.*}} void 
@_Z25test_image_store_1d_v4f16Dv4_DF16_iu18__amdgpu_texture_t(
+// LLVM: call void @llvm.amdgcn.image.store.1d.v4f16.i32.v8i32(<4 x half> 
{{.*}}, i32 {{.*}}, i32 {{.*}}, <8 x i32> {{.*}}, i32 {{.*}}, i32 {{.*}})
+__device__ void test_image_store_1d_v4f16(half4 val, int x, __amdgpu_texture_t 
rsrc) {
+  __builtin_amdgcn_image_store_1d_v4f16_i32(val, 15, x, rsrc, 120, 110);
+}
+
+// CIR-LABEL: @_Z29test_image_load_1darray_v4f32
+// CIR: cir.call_llvm_intrinsic "amdgcn.image.load.1darray" {{.*}} : (!s32i, 
!s32i, !s32i, !cir.vector<8 x !s32i>, !s32i, !s32i) -> !cir.vector<4 x 
!cir.float>
+// LLVM: define{{.*}} <4 x float> 
@_Z29test_image_load_1darray_v4f32iiu18__amdgpu_texture_t(
+// LLVM: call {{.*}}<4 x float> 
@llvm.amdgcn.image.load.1darray.v4f32.i32.v8i32(i32 {{.*}}, i32 {{.*}}, i32 
{{.*}}, <8 x i32> {{.*}}, i32 {{.*}}, i32 {{.*}})
+__device__ float4 test_image_load_1darray_v4f32(int x, int slice, 
__amdgpu_texture_t rsrc) {
+  return __builtin_amdgcn_image_load_1darray_v4f32_i32(15, x, slice, rsrc, 
120, 110);
+}
+
+// CIR-LABEL: @_Z29test_image_load_1darray_v4f16
+// CIR: cir.call_llvm_intrinsic "amdgcn.image.load.1darray" {{.*}} : (!s32i, 
!s32i, !s32i, !cir.vector<8 x !s32i>, !s32i, !s32i) -> !cir.vector<4 x !cir.f16>
+// LLVM: define{{.*}} <4 x half> 
@_Z29test_image_load_1darray_v4f16iiu18__amdgpu_texture_t(
+// LLVM: call {{.*}}<4 x half> 
@llvm.amdgcn.image.load.1darray.v4f16.i32.v8i32(i32 {{.*}}, i32 {{.*}}, i32 
{{.*}}, <8 x i32> {{.*}}, i32 {{.*}}, i32 {{.*}})
+__device__ half4 test_image_load_1darray_v4f16(int x, int slice, 
__amdgpu_texture_t rsrc) {
+  return __builtin_amdgcn_image_load_1darray_v4f16_i32(15, x, slice, rsrc, 
120, 110);
+}
+
+// CIR-LABEL: @_Z30test_image_store_1darray_v4f32
+// CIR: cir.call_llvm_intrinsic "amdgcn.image.store.1darray" {{.*}} : 
(!cir.vector<4 x !cir.float>, !s32i, !s32i, !s32i, !cir.vector<8 x !s32i>, 
!s32i, !s32i) -> !void
+// LLVM: define{{.*}} void 
@_Z30test_image_store_1darray_v4f32Dv4_fiiu18__amdgpu_texture_t(
+// LLVM: call void @llvm.amdgcn.image.store.1darray.v4f32.i32.v8i32(<4 x 
float> {{.*}}, i32 {{.*}}, i32 {{.*}}, i32 {{.*}}, <8 x i32> {{.*}}, i32 
{{.*}}, i32 {{.*}})
+__device__ void test_image_store_1darray_v4f32(float4 val, int x, int slice, 
__amdgpu_texture_t rsrc) {
+  __builtin_amdgcn_image_store_1darray_v4f32_i32(val, 15, x, slice, rsrc, 120, 
110);
+}
+
+// CIR-LABEL: @_Z30test_image_store_1darray_v4f16
+// CIR: cir.call_llvm_intrinsic "amdgcn.image.store.1darray" {{.*}} : 
(!cir.vector<4 x !cir.f16>, !s32i, !s32i, !s32i, !cir.vector<8 x !s32i>, !s32i, 
!s32i) -> !void
+// LLVM: define{{.*}} void 
@_Z30test_image_store_1darray_v4f16Dv4_DF16_iiu18__amdgpu_texture_t(
+// LLVM: call void @llvm.amdgcn.image.store.1darray.v4f16.i32.v8i32(<4 x half> 
{{.*}}, i32 {{.*}}, i32 {{.*}}, i32 {{.*}}, <8 x i32> {{.*}}, i32 {{.*}}, i32 
{{.*}})
+__device__ void test_image_store_1darray_v4f16(half4 val, int x, int slice, 
__amdgpu_texture_t rsrc) {
+  __builtin_amdgcn_image_store_1darray_v4f16_i32(val, 15, x, slice, rsrc, 120, 
110);
+}
+
+// CIR-LABEL: @_Z22test_image_load_2d_f32
+// CIR: cir.call_llvm_intrinsic "amdgcn.image.load.2d" {{.*}} : (!s32i, !s32i, 
!s32i, !cir.vector<8 x !s32i>, !s32i, !s32i) -> !cir.float
+// LLVM: define{{.*}} float @_Z22test_image_load_2d_f32iiu18__amdgpu_texture_t(
+// LLVM: call {{.*}}float @llvm.amdgcn.image.load.2d.f32.i32.v8i32(i32 {{.*}}, 
i32 {{.*}}, i32 {{.*}}, <8 x i32> {{.*}}, i32 {{.*}}, i32 {{.*}})
+__device__ float test_image_load_2d_f32(int x, int y, __amdgpu_texture_t rsrc) 
{
+  return __builtin_amdgcn_image_load_2d_f32_i32(2, x, y, rsrc, 106, 103);
+}
+
+// CIR-LABEL: @_Z24test_image_load_2d_v4f32
+// CIR: cir.call_llvm_intrinsic "amdgcn.image.load.2d" {{.*}} : (!s32i, !s32i, 
!s32i, !cir.vector<8 x !s32i>, !s32i, !s32i) -> !cir.vector<4 x !cir.float>
+// LLVM: define{{.*}} <4 x float> 
@_Z24test_image_load_2d_v4f32iiu18__amdgpu_texture_t(
+// LLVM: call {{.*}}<4 x float> @llvm.amdgcn.image.load.2d.v4f32.i32.v8i32(i32 
{{.*}}, i32 {{.*}}, i32 {{.*}}, <8 x i32> {{.*}}, i32 {{.*}}, i32 {{.*}})
+__device__ float4 test_image_load_2d_v4f32(int x, int y, __amdgpu_texture_t 
rsrc) {
+  return __builtin_amdgcn_image_load_2d_v4f32_i32(15, x, y, rsrc, 120, 110);
+}
+
+// CIR-LABEL: @_Z24test_image_load_2d_v4f16
+// CIR: cir.call_llvm_intrinsic "amdgcn.image.load.2d" {{.*}} : (!s32i, !s32i, 
!s32i, !cir.vector<8 x !s32i>, !s32i, !s32i) -> !cir.vector<4 x !cir.f16>
+// LLVM: define{{.*}} <4 x half> 
@_Z24test_image_load_2d_v4f16iiu18__amdgpu_texture_t(
+// LLVM: call {{.*}}<4 x half> @llvm.amdgcn.image.load.2d.v4f16.i32.v8i32(i32 
{{.*}}, i32 {{.*}}, i32 {{.*}}, <8 x i32> {{.*}}, i32 {{.*}}, i32 {{.*}})
+__device__ half4 test_image_load_2d_v4f16(int x, int y, __amdgpu_texture_t 
rsrc) {
+  return __builtin_amdgcn_image_load_2d_v4f16_i32(15, x, y, rsrc, 120, 110);
+}
+
+// CIR-LABEL: @_Z23test_image_store_2d_f32
+// CIR: cir.call_llvm_intrinsic "amdgcn.image.store.2d" {{.*}} : (!cir.float, 
!s32i, !s32i, !s32i, !cir.vector<8 x !s32i>, !s32i, !s32i) -> !void
+// LLVM: define{{.*}} void 
@_Z23test_image_store_2d_f32fiiu18__amdgpu_texture_t(
+// LLVM: call void @llvm.amdgcn.image.store.2d.f32.i32.v8i32(float {{.*}}, i32 
{{.*}}, i32 {{.*}}, i32 {{.*}}, <8 x i32> {{.*}}, i32 {{.*}}, i32 {{.*}})
+__device__ void test_image_store_2d_f32(float val, int x, int y, 
__amdgpu_texture_t rsrc) {
+  __builtin_amdgcn_image_store_2d_f32_i32(val, 12, x, y, rsrc, 106, 103);
+}
+
+// CIR-LABEL: @_Z25test_image_store_2d_v4f32
+// CIR: cir.call_llvm_intrinsic "amdgcn.image.store.2d" {{.*}} : 
(!cir.vector<4 x !cir.float>, !s32i, !s32i, !s32i, !cir.vector<8 x !s32i>, 
!s32i, !s32i) -> !void
+// LLVM: define{{.*}} void 
@_Z25test_image_store_2d_v4f32Dv4_fiiu18__amdgpu_texture_t(
+// LLVM: call void @llvm.amdgcn.image.store.2d.v4f32.i32.v8i32(<4 x float> 
{{.*}}, i32 {{.*}}, i32 {{.*}}, i32 {{.*}}, <8 x i32> {{.*}}, i32 {{.*}}, i32 
{{.*}})
+__device__ void test_image_store_2d_v4f32(float4 val, int x, int y, 
__amdgpu_texture_t rsrc) {
+  __builtin_amdgcn_image_store_2d_v4f32_i32(val, 15, x, y, rsrc, 120, 110);
+}
+
+// CIR-LABEL: @_Z25test_image_store_2d_v4f16
+// CIR: cir.call_llvm_intrinsic "amdgcn.image.store.2d" {{.*}} : 
(!cir.vector<4 x !cir.f16>, !s32i, !s32i, !s32i, !cir.vector<8 x !s32i>, !s32i, 
!s32i) -> !void
+// LLVM: define{{.*}} void 
@_Z25test_image_store_2d_v4f16Dv4_DF16_iiu18__amdgpu_texture_t(
+// LLVM: call void @llvm.amdgcn.image.store.2d.v4f16.i32.v8i32(<4 x half> 
{{.*}}, i32 {{.*}}, i32 {{.*}}, i32 {{.*}}, <8 x i32> {{.*}}, i32 {{.*}}, i32 
{{.*}})
+__device__ void test_image_store_2d_v4f16(half4 val, int x, int y, 
__amdgpu_texture_t rsrc) {
+  __builtin_amdgcn_image_store_2d_v4f16_i32(val, 15, x, y, rsrc, 120, 110);
+}
+
+// CIR-LABEL: @_Z27test_image_load_2darray_f32
+// CIR: cir.call_llvm_intrinsic "amdgcn.image.load.2darray" {{.*}} : (!s32i, 
!s32i, !s32i, !s32i, !cir.vector<8 x !s32i>, !s32i, !s32i) -> !cir.float
+// LLVM: define{{.*}} float 
@_Z27test_image_load_2darray_f32iiiu18__amdgpu_texture_t(
+// LLVM: call {{.*}}float @llvm.amdgcn.image.load.2darray.f32.i32.v8i32(i32 
{{.*}}, i32 {{.*}}, i32 {{.*}}, i32 {{.*}}, <8 x i32> {{.*}}, i32 {{.*}}, i32 
{{.*}})
+__device__ float test_image_load_2darray_f32(int x, int y, int slice, 
__amdgpu_texture_t rsrc) {
+  return __builtin_amdgcn_image_load_2darray_f32_i32(4, x, y, slice, rsrc, 
120, 110);
+}
+
+// CIR-LABEL: @_Z29test_image_load_2darray_v4f32
+// CIR: cir.call_llvm_intrinsic "amdgcn.image.load.2darray" {{.*}} : (!s32i, 
!s32i, !s32i, !s32i, !cir.vector<8 x !s32i>, !s32i, !s32i) -> !cir.vector<4 x 
!cir.float>
+// LLVM: define{{.*}} <4 x float> 
@_Z29test_image_load_2darray_v4f32iiiu18__amdgpu_texture_t(
+// LLVM: call {{.*}}<4 x float> 
@llvm.amdgcn.image.load.2darray.v4f32.i32.v8i32(i32 {{.*}}, i32 {{.*}}, i32 
{{.*}}, i32 {{.*}}, <8 x i32> {{.*}}, i32 {{.*}}, i32 {{.*}})
+__device__ float4 test_image_load_2darray_v4f32(int x, int y, int slice, 
__amdgpu_texture_t rsrc) {
+  return __builtin_amdgcn_image_load_2darray_v4f32_i32(15, x, y, slice, rsrc, 
120, 110);
+}
+
+// CIR-LABEL: @_Z29test_image_load_2darray_v4f16
+// CIR: cir.call_llvm_intrinsic "amdgcn.image.load.2darray" {{.*}} : (!s32i, 
!s32i, !s32i, !s32i, !cir.vector<8 x !s32i>, !s32i, !s32i) -> !cir.vector<4 x 
!cir.f16>
+// LLVM: define{{.*}} <4 x half> 
@_Z29test_image_load_2darray_v4f16iiiu18__amdgpu_texture_t(
+// LLVM: call {{.*}}<4 x half> 
@llvm.amdgcn.image.load.2darray.v4f16.i32.v8i32(i32 {{.*}}, i32 {{.*}}, i32 
{{.*}}, i32 {{.*}}, <8 x i32> {{.*}}, i32 {{.*}}, i32 {{.*}})
+__device__ half4 test_image_load_2darray_v4f16(int x, int y, int slice, 
__amdgpu_texture_t rsrc) {
+  return __builtin_amdgcn_image_load_2darray_v4f16_i32(15, x, y, slice, rsrc, 
120, 110);
+}
+
+// CIR-LABEL: @_Z28test_image_store_2darray_f32
+// CIR: cir.call_llvm_intrinsic "amdgcn.image.store.2darray" {{.*}} : 
(!cir.float, !s32i, !s32i, !s32i, !s32i, !cir.vector<8 x !s32i>, !s32i, !s32i) 
-> !void
+// LLVM: define{{.*}} void 
@_Z28test_image_store_2darray_f32fiiiu18__amdgpu_texture_t(
+// LLVM: call void @llvm.amdgcn.image.store.2darray.f32.i32.v8i32(float 
{{.*}}, i32 {{.*}}, i32 {{.*}}, i32 {{.*}}, i32 {{.*}}, <8 x i32> {{.*}}, i32 
{{.*}}, i32 {{.*}})
+__device__ void test_image_store_2darray_f32(float val, int x, int y, int 
slice, __amdgpu_texture_t rsrc) {
+  __builtin_amdgcn_image_store_2darray_f32_i32(val, 12, x, y, slice, rsrc, 
106, 103);
+}
+
+// CIR-LABEL: @_Z30test_image_store_2darray_v4f32
+// CIR: cir.call_llvm_intrinsic "amdgcn.image.store.2darray" {{.*}} : 
(!cir.vector<4 x !cir.float>, !s32i, !s32i, !s32i, !s32i, !cir.vector<8 x 
!s32i>, !s32i, !s32i) -> !void
+// LLVM: define{{.*}} void 
@_Z30test_image_store_2darray_v4f32Dv4_fiiiu18__amdgpu_texture_t(
+// LLVM: call void @llvm.amdgcn.image.store.2darray.v4f32.i32.v8i32(<4 x 
float> {{.*}}, i32 {{.*}}, i32 {{.*}}, i32 {{.*}}, i32 {{.*}}, <8 x i32> 
{{.*}}, i32 {{.*}}, i32 {{.*}})
+__device__ void test_image_store_2darray_v4f32(float4 val, int x, int y, int 
slice, __amdgpu_texture_t rsrc) {
+  __builtin_amdgcn_image_store_2darray_v4f32_i32(val, 15, x, y, slice, rsrc, 
120, 110);
+}
+
+// CIR-LABEL: @_Z30test_image_store_2darray_v4f16
+// CIR: cir.call_llvm_intrinsic "amdgcn.image.store.2darray" {{.*}} : 
(!cir.vector<4 x !cir.f16>, !s32i, !s32i, !s32i, !s32i, !cir.vector<8 x !s32i>, 
!s32i, !s32i) -> !void
+// LLVM: define{{.*}} void 
@_Z30test_image_store_2darray_v4f16Dv4_DF16_iiiu18__amdgpu_texture_t(
+// LLVM: call void @llvm.amdgcn.image.store.2darray.v4f16.i32.v8i32(<4 x half> 
{{.*}}, i32 {{.*}}, i32 {{.*}}, i32 {{.*}}, i32 {{.*}}, <8 x i32> {{.*}}, i32 
{{.*}}, i32 {{.*}})
+__device__ void test_image_store_2darray_v4f16(half4 val, int x, int y, int 
slice, __amdgpu_texture_t rsrc) {
+  __builtin_amdgcn_image_store_2darray_v4f16_i32(val, 15, x, y, slice, rsrc, 
120, 110);
+}
+
+// CIR-LABEL: @_Z24test_image_load_3d_v4f32
+// CIR: cir.call_llvm_intrinsic "amdgcn.image.load.3d" {{.*}} : (!s32i, !s32i, 
!s32i, !s32i, !cir.vector<8 x !s32i>, !s32i, !s32i) -> !cir.vector<4 x 
!cir.float>
+// LLVM: define{{.*}} <4 x float> 
@_Z24test_image_load_3d_v4f32iiiu18__amdgpu_texture_t(
+// LLVM: call {{.*}}<4 x float> @llvm.amdgcn.image.load.3d.v4f32.i32.v8i32(i32 
{{.*}}, i32 {{.*}}, i32 {{.*}}, i32 {{.*}}, <8 x i32> {{.*}}, i32 {{.*}}, i32 
{{.*}})
+__device__ float4 test_image_load_3d_v4f32(int x, int y, int z, 
__amdgpu_texture_t rsrc) {
+  return __builtin_amdgcn_image_load_3d_v4f32_i32(15, x, y, z, rsrc, 120, 110);
+}
+
+// CIR-LABEL: @_Z24test_image_load_3d_v4f16
+// CIR: cir.call_llvm_intrinsic "amdgcn.image.load.3d" {{.*}} : (!s32i, !s32i, 
!s32i, !s32i, !cir.vector<8 x !s32i>, !s32i, !s32i) -> !cir.vector<4 x !cir.f16>
+// LLVM: define{{.*}} <4 x half> 
@_Z24test_image_load_3d_v4f16iiiu18__amdgpu_texture_t(
+// LLVM: call {{.*}}<4 x half> @llvm.amdgcn.image.load.3d.v4f16.i32.v8i32(i32 
{{.*}}, i32 {{.*}}, i32 {{.*}}, i32 {{.*}}, <8 x i32> {{.*}}, i32 {{.*}}, i32 
{{.*}})
+__device__ half4 test_image_load_3d_v4f16(int x, int y, int z, 
__amdgpu_texture_t rsrc) {
+  return __builtin_amdgcn_image_load_3d_v4f16_i32(15, x, y, z, rsrc, 120, 110);
+}
+
+// CIR-LABEL: @_Z25test_image_store_3d_v4f32
+// CIR: cir.call_llvm_intrinsic "amdgcn.image.store.3d" {{.*}} : 
(!cir.vector<4 x !cir.float>, !s32i, !s32i, !s32i, !s32i, !cir.vector<8 x 
!s32i>, !s32i, !s32i) -> !void
+// LLVM: define{{.*}} void 
@_Z25test_image_store_3d_v4f32Dv4_fiiiu18__amdgpu_texture_t(
+// LLVM: call void @llvm.amdgcn.image.store.3d.v4f32.i32.v8i32(<4 x float> 
{{.*}}, i32 {{.*}}, i32 {{.*}}, i32 {{.*}}, i32 {{.*}}, <8 x i32> {{.*}}, i32 
{{.*}}, i32 {{.*}})
+__device__ void test_image_store_3d_v4f32(float4 val, int x, int y, int z, 
__amdgpu_texture_t rsrc) {
+  __builtin_amdgcn_image_store_3d_v4f32_i32(val, 15, x, y, z, rsrc, 120, 110);
+}
+
+// CIR-LABEL: @_Z25test_image_store_3d_v4f16
+// CIR: cir.call_llvm_intrinsic "amdgcn.image.store.3d" {{.*}} : 
(!cir.vector<4 x !cir.f16>, !s32i, !s32i, !s32i, !s32i, !cir.vector<8 x !s32i>, 
!s32i, !s32i) -> !void
+// LLVM: define{{.*}} void 
@_Z25test_image_store_3d_v4f16Dv4_DF16_iiiu18__amdgpu_texture_t(
+// LLVM: call void @llvm.amdgcn.image.store.3d.v4f16.i32.v8i32(<4 x half> 
{{.*}}, i32 {{.*}}, i32 {{.*}}, i32 {{.*}}, i32 {{.*}}, <8 x i32> {{.*}}, i32 
{{.*}}, i32 {{.*}})
+__device__ void test_image_store_3d_v4f16(half4 val, int x, int y, int z, 
__amdgpu_texture_t rsrc) {
+  __builtin_amdgcn_image_store_3d_v4f16_i32(val, 15, x, y, z, rsrc, 120, 110);
+}
+
+// CIR-LABEL: @_Z26test_image_load_cube_v4f32
+// CIR: cir.call_llvm_intrinsic "amdgcn.image.load.cube" {{.*}} : (!s32i, 
!s32i, !s32i, !s32i, !cir.vector<8 x !s32i>, !s32i, !s32i) -> !cir.vector<4 x 
!cir.float>
+// LLVM: define{{.*}} <4 x float> 
@_Z26test_image_load_cube_v4f32iiiu18__amdgpu_texture_t(
+// LLVM: call {{.*}}<4 x float> 
@llvm.amdgcn.image.load.cube.v4f32.i32.v8i32(i32 {{.*}}, i32 {{.*}}, i32 
{{.*}}, i32 {{.*}}, <8 x i32> {{.*}}, i32 {{.*}}, i32 {{.*}})
+__device__ float4 test_image_load_cube_v4f32(int x, int y, int face, 
__amdgpu_texture_t rsrc) {
+  return __builtin_amdgcn_image_load_cube_v4f32_i32(15, x, y, face, rsrc, 120, 
110);
+}
+
+// CIR-LABEL: @_Z26test_image_load_cube_v4f16
+// CIR: cir.call_llvm_intrinsic "amdgcn.image.load.cube" {{.*}} : (!s32i, 
!s32i, !s32i, !s32i, !cir.vector<8 x !s32i>, !s32i, !s32i) -> !cir.vector<4 x 
!cir.f16>
+// LLVM: define{{.*}} <4 x half> 
@_Z26test_image_load_cube_v4f16iiiu18__amdgpu_texture_t(
+// LLVM: call {{.*}}<4 x half> 
@llvm.amdgcn.image.load.cube.v4f16.i32.v8i32(i32 {{.*}}, i32 {{.*}}, i32 
{{.*}}, i32 {{.*}}, <8 x i32> {{.*}}, i32 {{.*}}, i32 {{.*}})
+__device__ half4 test_image_load_cube_v4f16(int x, int y, int face, 
__amdgpu_texture_t rsrc) {
+  return __builtin_amdgcn_image_load_cube_v4f16_i32(15, x, y, face, rsrc, 120, 
110);
+}
+
+// CIR-LABEL: @_Z27test_image_store_cube_v4f32
+// CIR: cir.call_llvm_intrinsic "amdgcn.image.store.cube" {{.*}} : 
(!cir.vector<4 x !cir.float>, !s32i, !s32i, !s32i, !s32i, !cir.vector<8 x 
!s32i>, !s32i, !s32i) -> !void
+// LLVM: define{{.*}} void 
@_Z27test_image_store_cube_v4f32Dv4_fiiiu18__amdgpu_texture_t(
+// LLVM: call void @llvm.amdgcn.image.store.cube.v4f32.i32.v8i32(<4 x float> 
{{.*}}, i32 {{.*}}, i32 {{.*}}, i32 {{.*}}, i32 {{.*}}, <8 x i32> {{.*}}, i32 
{{.*}}, i32 {{.*}})
+__device__ void test_image_store_cube_v4f32(float4 val, int x, int y, int 
face, __amdgpu_texture_t rsrc) {
+  __builtin_amdgcn_image_store_cube_v4f32_i32(val, 15, x, y, face, rsrc, 120, 
110);
+}
+
+// CIR-LABEL: @_Z27test_image_store_cube_v4f16
+// CIR: cir.call_llvm_intrinsic "amdgcn.image.store.cube" {{.*}} : 
(!cir.vector<4 x !cir.f16>, !s32i, !s32i, !s32i, !s32i, !cir.vector<8 x !s32i>, 
!s32i, !s32i) -> !void
+// LLVM: define{{.*}} void 
@_Z27test_image_store_cube_v4f16Dv4_DF16_iiiu18__amdgpu_texture_t(
+// LLVM: call void @llvm.amdgcn.image.store.cube.v4f16.i32.v8i32(<4 x half> 
{{.*}}, i32 {{.*}}, i32 {{.*}}, i32 {{.*}}, i32 {{.*}}, <8 x i32> {{.*}}, i32 
{{.*}}, i32 {{.*}})
+__device__ void test_image_store_cube_v4f16(half4 val, int x, int y, int face, 
__amdgpu_texture_t rsrc) {
+  __builtin_amdgcn_image_store_cube_v4f16_i32(val, 15, x, y, face, rsrc, 120, 
110);
+}
+
+// CIR-LABEL: @_Z28test_image_load_mip_1d_v4f32
+// CIR: cir.call_llvm_intrinsic "amdgcn.image.load.mip.1d" {{.*}} : (!s32i, 
!s32i, !s32i, !cir.vector<8 x !s32i>, !s32i, !s32i) -> !cir.vector<4 x 
!cir.float>
+// LLVM: define{{.*}} <4 x float> 
@_Z28test_image_load_mip_1d_v4f32iiu18__amdgpu_texture_t(
+// LLVM: call {{.*}}<4 x float> 
@llvm.amdgcn.image.load.mip.1d.v4f32.i32.v8i32(i32 {{.*}}, i32 {{.*}}, i32 
{{.*}}, <8 x i32> {{.*}}, i32 {{.*}}, i32 {{.*}})
+__device__ float4 test_image_load_mip_1d_v4f32(int x, int mip, 
__amdgpu_texture_t rsrc) {
+  return __builtin_amdgcn_image_load_mip_1d_v4f32_i32(15, x, mip, rsrc, 120, 
110);
+}
+
+// CIR-LABEL: @_Z28test_image_load_mip_1d_v4f16
+// CIR: cir.call_llvm_intrinsic "amdgcn.image.load.mip.1d" {{.*}} : (!s32i, 
!s32i, !s32i, !cir.vector<8 x !s32i>, !s32i, !s32i) -> !cir.vector<4 x !cir.f16>
+// LLVM: define{{.*}} <4 x half> 
@_Z28test_image_load_mip_1d_v4f16iiu18__amdgpu_texture_t(
+// LLVM: call {{.*}}<4 x half> 
@llvm.amdgcn.image.load.mip.1d.v4f16.i32.v8i32(i32 {{.*}}, i32 {{.*}}, i32 
{{.*}}, <8 x i32> {{.*}}, i32 {{.*}}, i32 {{.*}})
+__device__ half4 test_image_load_mip_1d_v4f16(int x, int mip, 
__amdgpu_texture_t rsrc) {
+  return __builtin_amdgcn_image_load_mip_1d_v4f16_i32(15, x, mip, rsrc, 120, 
110);
+}
+
+// CIR-LABEL: @_Z29test_image_store_mip_1d_v4f32
+// CIR: cir.call_llvm_intrinsic "amdgcn.image.store.mip.1d" {{.*}} : 
(!cir.vector<4 x !cir.float>, !s32i, !s32i, !s32i, !cir.vector<8 x !s32i>, 
!s32i, !s32i) -> !void
+// LLVM: define{{.*}} void 
@_Z29test_image_store_mip_1d_v4f32Dv4_fiiu18__amdgpu_texture_t(
+// LLVM: call void @llvm.amdgcn.image.store.mip.1d.v4f32.i32.v8i32(<4 x float> 
{{.*}}, i32 {{.*}}, i32 {{.*}}, i32 {{.*}}, <8 x i32> {{.*}}, i32 {{.*}}, i32 
{{.*}})
+__device__ void test_image_store_mip_1d_v4f32(float4 val, int x, int mip, 
__amdgpu_texture_t rsrc) {
+  __builtin_amdgcn_image_store_mip_1d_v4f32_i32(val, 15, x, mip, rsrc, 120, 
110);
+}
+
+// CIR-LABEL: @_Z29test_image_store_mip_1d_v4f16
+// CIR: cir.call_llvm_intrinsic "amdgcn.image.store.mip.1d" {{.*}} : 
(!cir.vector<4 x !cir.f16>, !s32i, !s32i, !s32i, !cir.vector<8 x !s32i>, !s32i, 
!s32i) -> !void
+// LLVM: define{{.*}} void 
@_Z29test_image_store_mip_1d_v4f16Dv4_DF16_iiu18__amdgpu_texture_t(
+// LLVM: call void @llvm.amdgcn.image.store.mip.1d.v4f16.i32.v8i32(<4 x half> 
{{.*}}, i32 {{.*}}, i32 {{.*}}, i32 {{.*}}, <8 x i32> {{.*}}, i32 {{.*}}, i32 
{{.*}})
+__device__ void test_image_store_mip_1d_v4f16(half4 val, int x, int mip, 
__amdgpu_texture_t rsrc) {
+  __builtin_amdgcn_image_store_mip_1d_v4f16_i32(val, 15, x, mip, rsrc, 120, 
110);
+}
+
+// CIR-LABEL: @_Z33test_image_load_mip_1darray_v4f32
+// CIR: cir.call_llvm_intrinsic "amdgcn.image.load.mip.1darray" {{.*}} : 
(!s32i, !s32i, !s32i, !s32i, !cir.vector<8 x !s32i>, !s32i, !s32i) -> 
!cir.vector<4 x !cir.float>
+// LLVM: define{{.*}} <4 x float> 
@_Z33test_image_load_mip_1darray_v4f32iiiu18__amdgpu_texture_t(
+// LLVM: call {{.*}}<4 x float> 
@llvm.amdgcn.image.load.mip.1darray.v4f32.i32.v8i32(i32 {{.*}}, i32 {{.*}}, i32 
{{.*}}, i32 {{.*}}, <8 x i32> {{.*}}, i32 {{.*}}, i32 {{.*}})
+__device__ float4 test_image_load_mip_1darray_v4f32(int x, int slice, int mip, 
__amdgpu_texture_t rsrc) {
+  return __builtin_amdgcn_image_load_mip_1darray_v4f32_i32(15, x, slice, mip, 
rsrc, 120, 110);
+}
+
+// CIR-LABEL: @_Z33test_image_load_mip_1darray_v4f16
+// CIR: cir.call_llvm_intrinsic "amdgcn.image.load.mip.1darray" {{.*}} : 
(!s32i, !s32i, !s32i, !s32i, !cir.vector<8 x !s32i>, !s32i, !s32i) -> 
!cir.vector<4 x !cir.f16>
+// LLVM: define{{.*}} <4 x half> 
@_Z33test_image_load_mip_1darray_v4f16iiiu18__amdgpu_texture_t(
+// LLVM: call {{.*}}<4 x half> 
@llvm.amdgcn.image.load.mip.1darray.v4f16.i32.v8i32(i32 {{.*}}, i32 {{.*}}, i32 
{{.*}}, i32 {{.*}}, <8 x i32> {{.*}}, i32 {{.*}}, i32 {{.*}})
+__device__ half4 test_image_load_mip_1darray_v4f16(int x, int slice, int mip, 
__amdgpu_texture_t rsrc) {
+  return __builtin_amdgcn_image_load_mip_1darray_v4f16_i32(15, x, slice, mip, 
rsrc, 120, 110);
+}
+
+// CIR-LABEL: @_Z34test_image_store_mip_1darray_v4f32
+// CIR: cir.call_llvm_intrinsic "amdgcn.image.store.mip.1darray" {{.*}} : 
(!cir.vector<4 x !cir.float>, !s32i, !s32i, !s32i, !s32i, !cir.vector<8 x 
!s32i>, !s32i, !s32i) -> !void
+// LLVM: define{{.*}} void 
@_Z34test_image_store_mip_1darray_v4f32Dv4_fiiiu18__amdgpu_texture_t(
+// LLVM: call void @llvm.amdgcn.image.store.mip.1darray.v4f32.i32.v8i32(<4 x 
float> {{.*}}, i32 {{.*}}, i32 {{.*}}, i32 {{.*}}, i32 {{.*}}, <8 x i32> 
{{.*}}, i32 {{.*}}, i32 {{.*}})
+__device__ void test_image_store_mip_1darray_v4f32(float4 val, int x, int 
slice, int mip, __amdgpu_texture_t rsrc) {
+  __builtin_amdgcn_image_store_mip_1darray_v4f32_i32(val, 15, x, slice, mip, 
rsrc, 120, 110);
+}
+
+// CIR-LABEL: @_Z34test_image_store_mip_1darray_v4f16
+// CIR: cir.call_llvm_intrinsic "amdgcn.image.store.mip.1darray" {{.*}} : 
(!cir.vector<4 x !cir.f16>, !s32i, !s32i, !s32i, !s32i, !cir.vector<8 x !s32i>, 
!s32i, !s32i) -> !void
+// LLVM: define{{.*}} void 
@_Z34test_image_store_mip_1darray_v4f16Dv4_DF16_iiiu18__amdgpu_texture_t(
+// LLVM: call void @llvm.amdgcn.image.store.mip.1darray.v4f16.i32.v8i32(<4 x 
half> {{.*}}, i32 {{.*}}, i32 {{.*}}, i32 {{.*}}, i32 {{.*}}, <8 x i32> {{.*}}, 
i32 {{.*}}, i32 {{.*}})
+__device__ void test_image_store_mip_1darray_v4f16(half4 val, int x, int 
slice, int mip, __amdgpu_texture_t rsrc) {
+  __builtin_amdgcn_image_store_mip_1darray_v4f16_i32(val, 15, x, slice, mip, 
rsrc, 120, 110);
+}
+
+// CIR-LABEL: @_Z26test_image_load_mip_2d_f32
+// CIR: cir.call_llvm_intrinsic "amdgcn.image.load.mip.2d" {{.*}} : (!s32i, 
!s32i, !s32i, !s32i, !cir.vector<8 x !s32i>, !s32i, !s32i) -> !cir.float
+// LLVM: define{{.*}} float 
@_Z26test_image_load_mip_2d_f32iiiu18__amdgpu_texture_t(
+// LLVM: call {{.*}}float @llvm.amdgcn.image.load.mip.2d.f32.i32.v8i32(i32 
{{.*}}, i32 {{.*}}, i32 {{.*}}, i32 {{.*}}, <8 x i32> {{.*}}, i32 {{.*}}, i32 
{{.*}})
+__device__ float test_image_load_mip_2d_f32(int x, int y, int mip, 
__amdgpu_texture_t rsrc) {
+  return __builtin_amdgcn_image_load_mip_2d_f32_i32(8, x, y, mip, rsrc, 120, 
110);
+}
+
+// CIR-LABEL: @_Z28test_image_load_mip_2d_v4f32
+// CIR: cir.call_llvm_intrinsic "amdgcn.image.load.mip.2d" {{.*}} : (!s32i, 
!s32i, !s32i, !s32i, !cir.vector<8 x !s32i>, !s32i, !s32i) -> !cir.vector<4 x 
!cir.float>
+// LLVM: define{{.*}} <4 x float> 
@_Z28test_image_load_mip_2d_v4f32iiiu18__amdgpu_texture_t(
+// LLVM: call {{.*}}<4 x float> 
@llvm.amdgcn.image.load.mip.2d.v4f32.i32.v8i32(i32 {{.*}}, i32 {{.*}}, i32 
{{.*}}, i32 {{.*}}, <8 x i32> {{.*}}, i32 {{.*}}, i32 {{.*}})
+__device__ float4 test_image_load_mip_2d_v4f32(int x, int y, int mip, 
__amdgpu_texture_t rsrc) {
+  return __builtin_amdgcn_image_load_mip_2d_v4f32_i32(15, x, y, mip, rsrc, 
120, 110);
+}
+
+// CIR-LABEL: @_Z28test_image_load_mip_2d_v4f16
+// CIR: cir.call_llvm_intrinsic "amdgcn.image.load.mip.2d" {{.*}} : (!s32i, 
!s32i, !s32i, !s32i, !cir.vector<8 x !s32i>, !s32i, !s32i) -> !cir.vector<4 x 
!cir.f16>
+// LLVM: define{{.*}} <4 x half> 
@_Z28test_image_load_mip_2d_v4f16iiiu18__amdgpu_texture_t(
+// LLVM: call {{.*}}<4 x half> 
@llvm.amdgcn.image.load.mip.2d.v4f16.i32.v8i32(i32 {{.*}}, i32 {{.*}}, i32 
{{.*}}, i32 {{.*}}, <8 x i32> {{.*}}, i32 {{.*}}, i32 {{.*}})
+__device__ half4 test_image_load_mip_2d_v4f16(int x, int y, int mip, 
__amdgpu_texture_t rsrc) {
+  return __builtin_amdgcn_image_load_mip_2d_v4f16_i32(15, x, y, mip, rsrc, 
120, 110);
+}
+
+// CIR-LABEL: @_Z27test_image_store_mip_2d_f32
+// CIR: cir.call_llvm_intrinsic "amdgcn.image.store.mip.2d" {{.*}} : 
(!cir.float, !s32i, !s32i, !s32i, !s32i, !cir.vector<8 x !s32i>, !s32i, !s32i) 
-> !void
+// LLVM: define{{.*}} void 
@_Z27test_image_store_mip_2d_f32fiiiu18__amdgpu_texture_t(
+// LLVM: call void @llvm.amdgcn.image.store.mip.2d.f32.i32.v8i32(float {{.*}}, 
i32 {{.*}}, i32 {{.*}}, i32 {{.*}}, i32 {{.*}}, <8 x i32> {{.*}}, i32 {{.*}}, 
i32 {{.*}})
+__device__ void test_image_store_mip_2d_f32(float val, int x, int y, int mip, 
__amdgpu_texture_t rsrc) {
+  __builtin_amdgcn_image_store_mip_2d_f32_i32(val, 15, x, y, mip, rsrc, 120, 
110);
+}
+
+// CIR-LABEL: @_Z29test_image_store_mip_2d_v4f32
+// CIR: cir.call_llvm_intrinsic "amdgcn.image.store.mip.2d" {{.*}} : 
(!cir.vector<4 x !cir.float>, !s32i, !s32i, !s32i, !s32i, !cir.vector<8 x 
!s32i>, !s32i, !s32i) -> !void
+// LLVM: define{{.*}} void 
@_Z29test_image_store_mip_2d_v4f32Dv4_fiiiu18__amdgpu_texture_t(
+// LLVM: call void @llvm.amdgcn.image.store.mip.2d.v4f32.i32.v8i32(<4 x float> 
{{.*}}, i32 {{.*}}, i32 {{.*}}, i32 {{.*}}, i32 {{.*}}, <8 x i32> {{.*}}, i32 
{{.*}}, i32 {{.*}})
+__device__ void test_image_store_mip_2d_v4f32(float4 val, int x, int y, int 
mip, __amdgpu_texture_t rsrc) {
+  __builtin_amdgcn_image_store_mip_2d_v4f32_i32(val, 15, x, y, mip, rsrc, 120, 
110);
+}
+
+// CIR-LABEL: @_Z29test_image_store_mip_2d_v4f16
+// CIR: cir.call_llvm_intrinsic "amdgcn.image.store.mip.2d" {{.*}} : 
(!cir.vector<4 x !cir.f16>, !s32i, !s32i, !s32i, !s32i, !cir.vector<8 x !s32i>, 
!s32i, !s32i) -> !void
+// LLVM: define{{.*}} void 
@_Z29test_image_store_mip_2d_v4f16Dv4_DF16_iiiu18__amdgpu_texture_t(
+// LLVM: call void @llvm.amdgcn.image.store.mip.2d.v4f16.i32.v8i32(<4 x half> 
{{.*}}, i32 {{.*}}, i32 {{.*}}, i32 {{.*}}, i32 {{.*}}, <8 x i32> {{.*}}, i32 
{{.*}}, i32 {{.*}})
+__device__ void test_image_store_mip_2d_v4f16(half4 val, int x, int y, int 
mip, __amdgpu_texture_t rsrc) {
+  __builtin_amdgcn_image_store_mip_2d_v4f16_i32(val, 15, x, y, mip, rsrc, 120, 
110);
+}
+
+// CIR-LABEL: @_Z31test_image_load_mip_2darray_f32
+// CIR: cir.call_llvm_intrinsic "amdgcn.image.load.mip.2darray" {{.*}} : 
(!s32i, !s32i, !s32i, !s32i, !s32i, !cir.vector<8 x !s32i>, !s32i, !s32i) -> 
!cir.float
+// LLVM: define{{.*}} float 
@_Z31test_image_load_mip_2darray_f32iiiiu18__amdgpu_texture_t(
+// LLVM: call {{.*}}float 
@llvm.amdgcn.image.load.mip.2darray.f32.i32.v8i32(i32 {{.*}}, i32 {{.*}}, i32 
{{.*}}, i32 {{.*}}, i32 {{.*}}, <8 x i32> {{.*}}, i32 {{.*}}, i32 {{.*}})
+__device__ float test_image_load_mip_2darray_f32(int x, int y, int slice, int 
mip, __amdgpu_texture_t rsrc) {
+  return __builtin_amdgcn_image_load_mip_2darray_f32_i32(8, x, y, slice, mip, 
rsrc, 120, 110);
+}
+
+// CIR-LABEL: @_Z33test_image_load_mip_2darray_v4f32
+// CIR: cir.call_llvm_intrinsic "amdgcn.image.load.mip.2darray" {{.*}} : 
(!s32i, !s32i, !s32i, !s32i, !s32i, !cir.vector<8 x !s32i>, !s32i, !s32i) -> 
!cir.vector<4 x !cir.float>
+// LLVM: define{{.*}} <4 x float> 
@_Z33test_image_load_mip_2darray_v4f32iiiiu18__amdgpu_texture_t(
+// LLVM: call {{.*}}<4 x float> 
@llvm.amdgcn.image.load.mip.2darray.v4f32.i32.v8i32(i32 {{.*}}, i32 {{.*}}, i32 
{{.*}}, i32 {{.*}}, i32 {{.*}}, <8 x i32> {{.*}}, i32 {{.*}}, i32 {{.*}})
+__device__ float4 test_image_load_mip_2darray_v4f32(int x, int y, int slice, 
int mip, __amdgpu_texture_t rsrc) {
+  return __builtin_amdgcn_image_load_mip_2darray_v4f32_i32(15, x, y, slice, 
mip, rsrc, 120, 110);
+}
+
+// CIR-LABEL: @_Z33test_image_load_mip_2darray_v4f16
+// CIR: cir.call_llvm_intrinsic "amdgcn.image.load.mip.2darray" {{.*}} : 
(!s32i, !s32i, !s32i, !s32i, !s32i, !cir.vector<8 x !s32i>, !s32i, !s32i) -> 
!cir.vector<4 x !cir.f16>
+// LLVM: define{{.*}} <4 x half> 
@_Z33test_image_load_mip_2darray_v4f16iiiiu18__amdgpu_texture_t(
+// LLVM: call {{.*}}<4 x half> 
@llvm.amdgcn.image.load.mip.2darray.v4f16.i32.v8i32(i32 {{.*}}, i32 {{.*}}, i32 
{{.*}}, i32 {{.*}}, i32 {{.*}}, <8 x i32> {{.*}}, i32 {{.*}}, i32 {{.*}})
+__device__ half4 test_image_load_mip_2darray_v4f16(int x, int y, int slice, 
int mip, __amdgpu_texture_t rsrc) {
+  return __builtin_amdgcn_image_load_mip_2darray_v4f16_i32(15, x, y, slice, 
mip, rsrc, 120, 110);
+}
+
+// CIR-LABEL: @_Z32test_image_store_mip_2darray_f32
+// CIR: cir.call_llvm_intrinsic "amdgcn.image.store.mip.2darray" {{.*}} : 
(!cir.float, !s32i, !s32i, !s32i, !s32i, !s32i, !cir.vector<8 x !s32i>, !s32i, 
!s32i) -> !void
+// LLVM: define{{.*}} void 
@_Z32test_image_store_mip_2darray_f32fiiiiu18__amdgpu_texture_t(
+// LLVM: call void @llvm.amdgcn.image.store.mip.2darray.f32.i32.v8i32(float 
{{.*}}, i32 {{.*}}, i32 {{.*}}, i32 {{.*}}, i32 {{.*}}, i32 {{.*}}, <8 x i32> 
{{.*}}, i32 {{.*}}, i32 {{.*}})
+__device__ void test_image_store_mip_2darray_f32(float val, int x, int y, int 
slice, int mip, __amdgpu_texture_t rsrc) {
+  __builtin_amdgcn_image_store_mip_2darray_f32_i32(val, 15, x, y, slice, mip, 
rsrc, 120, 110);
+}
+
+// CIR-LABEL: @_Z34test_image_store_mip_2darray_v4f32
+// CIR: cir.call_llvm_intrinsic "amdgcn.image.store.mip.2darray" {{.*}} : 
(!cir.vector<4 x !cir.float>, !s32i, !s32i, !s32i, !s32i, !s32i, !cir.vector<8 
x !s32i>, !s32i, !s32i) -> !void
+// LLVM: define{{.*}} void 
@_Z34test_image_store_mip_2darray_v4f32Dv4_fiiiiu18__amdgpu_texture_t(
+// LLVM: call void @llvm.amdgcn.image.store.mip.2darray.v4f32.i32.v8i32(<4 x 
float> {{.*}}, i32 {{.*}}, i32 {{.*}}, i32 {{.*}}, i32 {{.*}}, i32 {{.*}}, <8 x 
i32> {{.*}}, i32 {{.*}}, i32 {{.*}})
+__device__ void test_image_store_mip_2darray_v4f32(float4 val, int x, int y, 
int slice, int mip, __amdgpu_texture_t rsrc) {
+  __builtin_amdgcn_image_store_mip_2darray_v4f32_i32(val, 15, x, y, slice, 
mip, rsrc, 120, 110);
+}
+
+// CIR-LABEL: @_Z34test_image_store_mip_2darray_v4f16
+// CIR: cir.call_llvm_intrinsic "amdgcn.image.store.mip.2darray" {{.*}} : 
(!cir.vector<4 x !cir.f16>, !s32i, !s32i, !s32i, !s32i, !s32i, !cir.vector<8 x 
!s32i>, !s32i, !s32i) -> !void
+// LLVM: define{{.*}} void 
@_Z34test_image_store_mip_2darray_v4f16Dv4_DF16_iiiiu18__amdgpu_texture_t(
+// LLVM: call void @llvm.amdgcn.image.store.mip.2darray.v4f16.i32.v8i32(<4 x 
half> {{.*}}, i32 {{.*}}, i32 {{.*}}, i32 {{.*}}, i32 {{.*}}, i32 {{.*}}, <8 x 
i32> {{.*}}, i32 {{.*}}, i32 {{.*}})
+__device__ void test_image_store_mip_2darray_v4f16(half4 val, int x, int y, 
int slice, int mip, __amdgpu_texture_t rsrc) {
+  __builtin_amdgcn_image_store_mip_2darray_v4f16_i32(val, 15, x, y, slice, 
mip, rsrc, 120, 110);
+}
+
+// CIR-LABEL: @_Z28test_image_load_mip_3d_v4f32
+// CIR: cir.call_llvm_intrinsic "amdgcn.image.load.mip.3d" {{.*}} : (!s32i, 
!s32i, !s32i, !s32i, !s32i, !cir.vector<8 x !s32i>, !s32i, !s32i) -> 
!cir.vector<4 x !cir.float>
+// LLVM: define{{.*}} <4 x float> 
@_Z28test_image_load_mip_3d_v4f32iiiiu18__amdgpu_texture_t(
+// LLVM: call {{.*}}<4 x float> 
@llvm.amdgcn.image.load.mip.3d.v4f32.i32.v8i32(i32 {{.*}}, i32 {{.*}}, i32 
{{.*}}, i32 {{.*}}, i32 {{.*}}, <8 x i32> {{.*}}, i32 {{.*}}, i32 {{.*}})
+__device__ float4 test_image_load_mip_3d_v4f32(int x, int y, int z, int mip, 
__amdgpu_texture_t rsrc) {
+  return __builtin_amdgcn_image_load_mip_3d_v4f32_i32(15, x, y, z, mip, rsrc, 
120, 110);
+}
+
+// CIR-LABEL: @_Z28test_image_load_mip_3d_v4f16
+// CIR: cir.call_llvm_intrinsic "amdgcn.image.load.mip.3d" {{.*}} : (!s32i, 
!s32i, !s32i, !s32i, !s32i, !cir.vector<8 x !s32i>, !s32i, !s32i) -> 
!cir.vector<4 x !cir.f16>
+// LLVM: define{{.*}} <4 x half> 
@_Z28test_image_load_mip_3d_v4f16iiiiu18__amdgpu_texture_t(
+// LLVM: call {{.*}}<4 x half> 
@llvm.amdgcn.image.load.mip.3d.v4f16.i32.v8i32(i32 {{.*}}, i32 {{.*}}, i32 
{{.*}}, i32 {{.*}}, i32 {{.*}}, <8 x i32> {{.*}}, i32 {{.*}}, i32 {{.*}})
+__device__ half4 test_image_load_mip_3d_v4f16(int x, int y, int z, int mip, 
__amdgpu_texture_t rsrc) {
+  return __builtin_amdgcn_image_load_mip_3d_v4f16_i32(15, x, y, z, mip, rsrc, 
120, 110);
+}
+
+// CIR-LABEL: @_Z29test_image_store_mip_3d_v4f32
+// CIR: cir.call_llvm_intrinsic "amdgcn.image.store.mip.3d" {{.*}} : 
(!cir.vector<4 x !cir.float>, !s32i, !s32i, !s32i, !s32i, !s32i, !cir.vector<8 
x !s32i>, !s32i, !s32i) -> !void
+// LLVM: define{{.*}} void 
@_Z29test_image_store_mip_3d_v4f32Dv4_fiiiiu18__amdgpu_texture_t(
+// LLVM: call void @llvm.amdgcn.image.store.mip.3d.v4f32.i32.v8i32(<4 x float> 
{{.*}}, i32 {{.*}}, i32 {{.*}}, i32 {{.*}}, i32 {{.*}}, i32 {{.*}}, <8 x i32> 
{{.*}}, i32 {{.*}}, i32 {{.*}})
+__device__ void test_image_store_mip_3d_v4f32(float4 val, int x, int y, int z, 
int mip, __amdgpu_texture_t rsrc) {
+  __builtin_amdgcn_image_store_mip_3d_v4f32_i32(val, 15, x, y, z, mip, rsrc, 
120, 110);
+}
+
+// CIR-LABEL: @_Z29test_image_store_mip_3d_v4f16
+// CIR: cir.call_llvm_intrinsic "amdgcn.image.store.mip.3d" {{.*}} : 
(!cir.vector<4 x !cir.f16>, !s32i, !s32i, !s32i, !s32i, !s32i, !cir.vector<8 x 
!s32i>, !s32i, !s32i) -> !void
+// LLVM: define{{.*}} void 
@_Z29test_image_store_mip_3d_v4f16Dv4_DF16_iiiiu18__amdgpu_texture_t(
+// LLVM: call void @llvm.amdgcn.image.store.mip.3d.v4f16.i32.v8i32(<4 x half> 
{{.*}}, i32 {{.*}}, i32 {{.*}}, i32 {{.*}}, i32 {{.*}}, i32 {{.*}}, <8 x i32> 
{{.*}}, i32 {{.*}}, i32 {{.*}})
+__device__ void test_image_store_mip_3d_v4f16(half4 val, int x, int y, int z, 
int mip, __amdgpu_texture_t rsrc) {
+  __builtin_amdgcn_image_store_mip_3d_v4f16_i32(val, 15, x, y, z, mip, rsrc, 
120, 110);
+}
+
+// CIR-LABEL: @_Z30test_image_load_mip_cube_v4f32
+// CIR: cir.call_llvm_intrinsic "amdgcn.image.load.mip.cube" {{.*}} : (!s32i, 
!s32i, !s32i, !s32i, !s32i, !cir.vector<8 x !s32i>, !s32i, !s32i) -> 
!cir.vector<4 x !cir.float>
+// LLVM: define{{.*}} <4 x float> 
@_Z30test_image_load_mip_cube_v4f32iiiiu18__amdgpu_texture_t(
+// LLVM: call {{.*}}<4 x float> 
@llvm.amdgcn.image.load.mip.cube.v4f32.i32.v8i32(i32 {{.*}}, i32 {{.*}}, i32 
{{.*}}, i32 {{.*}}, i32 {{.*}}, <8 x i32> {{.*}}, i32 {{.*}}, i32 {{.*}})
+__device__ float4 test_image_load_mip_cube_v4f32(int x, int y, int face, int 
mip, __amdgpu_texture_t rsrc) {
+  return __builtin_amdgcn_image_load_mip_cube_v4f32_i32(15, x, y, face, mip, 
rsrc, 120, 110);
+}
+
+// CIR-LABEL: @_Z30test_image_load_mip_cube_v4f16
+// CIR: cir.call_llvm_intrinsic "amdgcn.image.load.mip.cube" {{.*}} : (!s32i, 
!s32i, !s32i, !s32i, !s32i, !cir.vector<8 x !s32i>, !s32i, !s32i) -> 
!cir.vector<4 x !cir.f16>
+// LLVM: define{{.*}} <4 x half> 
@_Z30test_image_load_mip_cube_v4f16iiiiu18__amdgpu_texture_t(
+// LLVM: call {{.*}}<4 x half> 
@llvm.amdgcn.image.load.mip.cube.v4f16.i32.v8i32(i32 {{.*}}, i32 {{.*}}, i32 
{{.*}}, i32 {{.*}}, i32 {{.*}}, <8 x i32> {{.*}}, i32 {{.*}}, i32 {{.*}})
+__device__ half4 test_image_load_mip_cube_v4f16(int x, int y, int face, int 
mip, __amdgpu_texture_t rsrc) {
+  return __builtin_amdgcn_image_load_mip_cube_v4f16_i32(15, x, y, face, mip, 
rsrc, 120, 110);
+}
+
+// CIR-LABEL: @_Z31test_image_store_mip_cube_v4f32
+// CIR: cir.call_llvm_intrinsic "amdgcn.image.store.mip.cube" {{.*}} : 
(!cir.vector<4 x !cir.float>, !s32i, !s32i, !s32i, !s32i, !s32i, !cir.vector<8 
x !s32i>, !s32i, !s32i) -> !void
+// LLVM: define{{.*}} void 
@_Z31test_image_store_mip_cube_v4f32Dv4_fiiiiu18__amdgpu_texture_t(
+// LLVM: call void @llvm.amdgcn.image.store.mip.cube.v4f32.i32.v8i32(<4 x 
float> {{.*}}, i32 {{.*}}, i32 {{.*}}, i32 {{.*}}, i32 {{.*}}, i32 {{.*}}, <8 x 
i32> {{.*}}, i32 {{.*}}, i32 {{.*}})
+__device__ void test_image_store_mip_cube_v4f32(float4 val, int x, int y, int 
face, int mip, __amdgpu_texture_t rsrc) {
+  __builtin_amdgcn_image_store_mip_cube_v4f32_i32(val, 15, x, y, face, mip, 
rsrc, 120, 110);
+}
+
+// CIR-LABEL: @_Z31test_image_store_mip_cube_v4f16
+// CIR: cir.call_llvm_intrinsic "amdgcn.image.store.mip.cube" {{.*}} : 
(!cir.vector<4 x !cir.f16>, !s32i, !s32i, !s32i, !s32i, !s32i, !cir.vector<8 x 
!s32i>, !s32i, !s32i) -> !void
+// LLVM: define{{.*}} void 
@_Z31test_image_store_mip_cube_v4f16Dv4_DF16_iiiiu18__amdgpu_texture_t(
+// LLVM: call void @llvm.amdgcn.image.store.mip.cube.v4f16.i32.v8i32(<4 x 
half> {{.*}}, i32 {{.*}}, i32 {{.*}}, i32 {{.*}}, i32 {{.*}}, i32 {{.*}}, <8 x 
i32> {{.*}}, i32 {{.*}}, i32 {{.*}})
+__device__ void test_image_store_mip_cube_v4f16(half4 val, int x, int y, int 
face, int mip, __amdgpu_texture_t rsrc) {
+  __builtin_amdgcn_image_store_mip_cube_v4f16_i32(val, 15, x, y, face, mip, 
rsrc, 120, 110);
+}

>From 47a026eeb1c9d725115488d982bf07f2b0951ede Mon Sep 17 00:00:00 2001
From: ranapratap55 <[email protected]>
Date: Mon, 25 May 2026 11:34:11 +0530
Subject: [PATCH 2/2] [CIR][AMDGPU] Clean up
 emitAMDGCNImageOverloadedReturnType helper

---
 clang/lib/CIR/CodeGen/CIRGenBuiltinAMDGPU.cpp | 41 ++-----------------
 1 file changed, 4 insertions(+), 37 deletions(-)

diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinAMDGPU.cpp 
b/clang/lib/CIR/CodeGen/CIRGenBuiltinAMDGPU.cpp
index 10f111d1f3cfd..9e38518d6d8c6 100644
--- a/clang/lib/CIR/CodeGen/CIRGenBuiltinAMDGPU.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenBuiltinAMDGPU.cpp
@@ -97,45 +97,12 @@ emitAMDGCNImageOverloadedReturnType(CIRGenFunction &cgf, 
const CallExpr *e,
                                     bool isImageStore) {
   auto &builder = cgf.getBuilder();
 
-  auto findTextureDescIndex = [&cgf](const CallExpr *e) -> unsigned {
-    QualType texQT = cgf.getContext().AMDGPUTextureTy;
-    for (unsigned i = 0, n = e->getNumArgs(); i < n; ++i) {
-      QualType argTy = e->getArg(i)->getType();
-      if (argTy == texQT ||
-          argTy.getCanonicalType() == texQT.getCanonicalType()) {
-        return i;
-      }
-    }
-    return ~0U;
-  };
-
-  unsigned rsrcIndex = findTextureDescIndex(e);
-  if (rsrcIndex == ~0U) {
-    llvm::report_fatal_error("Invalid argument count for image builtin");
-  }
-
-  cir::VectorType vec8I32Ty = cir::VectorType::get(builder.getSInt32Ty(), 8);
-
   llvm::SmallVector<mlir::Value, 10> args;
-  for (unsigned i = 0, n = e->getNumArgs(); i < n; ++i) {
-    mlir::Value v = cgf.emitScalarExpr(e->getArg(i));
+  for (unsigned i = 0, n = e->getNumArgs(); i < n; ++i)
+    args.push_back(cgf.emitScalarExpr(e->getArg(i)));
 
-    if (i == rsrcIndex) {
-      mlir::Type vTy = v.getType();
-      if (mlir::isa<cir::PointerType>(vTy)) {
-        v = builder.createAlignedLoad(cgf.getLoc(e->getExprLoc()), vec8I32Ty, 
v,
-                                      CharUnits::fromQuantity(32));
-      }
-    }
-    args.push_back(v);
-  }
-
-  mlir::Type retTy;
-  if (isImageStore) {
-    retTy = cir::VoidType::get(builder.getContext());
-  } else {
-    retTy = cgf.convertType(e->getType());
-  }
+  mlir::Type retTy = isImageStore ? cir::VoidType::get(builder.getContext())
+                                  : cgf.convertType(e->getType());
 
   auto callOp = cir::LLVMIntrinsicCallOp::create(
       builder, cgf.getLoc(e->getExprLoc()),

_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] [CIR][AMDGPU] Adds lowering for amdgcn image load/store builtins (PR #198184)

Reply via email to