[clang] [CIR][AMDGPU] Add support for AMDGCN ldexp builtins (PR #198087)

Ayokunle Amodu via cfe-commits Sat, 16 May 2026 07:26:10 -0700

https://github.com/ayokunle321 created 
https://github.com/llvm/llvm-project/pull/198087


Adds support for the following AMDGPU ldexp builtins:

- __builtin_amdgcn_ldexp (double)
- __builtin_amdgcn_ldexpf (float)
- __builtin_amdgcn_ldexph (half)

These are lowered to the `llvm.ldexp` intrinsic. 

>From 0aefe8fee051c718ca666193a4bebd01ba3616d1 Mon Sep 17 00:00:00 2001
From: Ayokunle Amodu <[email protected]>
Date: Sat, 16 May 2026 09:23:03 -0400
Subject: [PATCH] add ldexp builtin

---
 clang/lib/CIR/CodeGen/CIRGenBuiltinAMDGPU.cpp | 22 ++++--
 .../CIR/CodeGenHIP/builtins-amdgcn-vi.hip     | 67 +++++++++++++++++++
 clang/test/CIR/CodeGenHIP/builtins-amdgcn.hip | 16 +++++
 3 files changed, 100 insertions(+), 5 deletions(-)
 create mode 100644 clang/test/CIR/CodeGenHIP/builtins-amdgcn-vi.hip

diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinAMDGPU.cpp 
b/clang/lib/CIR/CodeGen/CIRGenBuiltinAMDGPU.cpp
index 04ab1c29b0d63..67ca5e730a17c 100644
--- a/clang/lib/CIR/CodeGen/CIRGenBuiltinAMDGPU.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenBuiltinAMDGPU.cpp
@@ -284,12 +284,24 @@ CIRGenFunction::emitAMDGPUBuiltinExpr(unsigned builtinId,
     return mlir::Value{};
   }
   case AMDGPU::BI__builtin_amdgcn_ldexp:
-  case AMDGPU::BI__builtin_amdgcn_ldexpf:
+  case AMDGPU::BI__builtin_amdgcn_ldexpf: {
+    mlir::Value src0 = emitScalarExpr(expr->getArg(0));
+    mlir::Value src1 = emitScalarExpr(expr->getArg(1));
+    return builder.emitIntrinsicCallOp(getLoc(expr->getExprLoc()), "ldexp",
+                                       src0.getType(),
+                                       mlir::ValueRange{src0, src1});
+  }
   case AMDGPU::BI__builtin_amdgcn_ldexph: {
-    cgm.errorNYI(expr->getSourceRange(),
-                 std::string("unimplemented AMDGPU builtin call: ") +
-                     getContext().BuiltinInfo.getName(builtinId));
-    return mlir::Value{};
+    // The raw instruction has a different behavior for out of bounds exponent
+    // values (implicit truncation instead of saturate to short_min/short_max).
+    mlir::Value src0 = emitScalarExpr(expr->getArg(0));
+    mlir::Value src1 = emitScalarExpr(expr->getArg(1));
+    mlir::Value truncSrc1 = cir::CastOp::create(
+        builder, getLoc(expr->getExprLoc()), builder.getSInt16Ty(),
+        cir::CastKind::integral, src1);
+    return builder.emitIntrinsicCallOp(getLoc(expr->getExprLoc()), "ldexp",
+                                       src0.getType(),
+                                       mlir::ValueRange{src0, truncSrc1});
   }
   case AMDGPU::BI__builtin_amdgcn_frexp_mant:
   case AMDGPU::BI__builtin_amdgcn_frexp_mantf:
diff --git a/clang/test/CIR/CodeGenHIP/builtins-amdgcn-vi.hip 
b/clang/test/CIR/CodeGenHIP/builtins-amdgcn-vi.hip
new file mode 100644
index 0000000000000..dcde886c205dc
--- /dev/null
+++ b/clang/test/CIR/CodeGenHIP/builtins-amdgcn-vi.hip
@@ -0,0 +1,67 @@
+#include "../CodeGenCUDA/Inputs/cuda.h"
+
+// REQUIRES: amdgpu-registered-target
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -std=c++11 -fclangir \
+// RUN:            -target-cpu tonga -fcuda-is-device -emit-cir %s -o %t.cir
+// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
+
+// REQUIRES: amdgpu-registered-target
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -std=c++11 -fclangir \
+// RUN:            -target-cpu gfx900 -fcuda-is-device -emit-cir %s -o %t.cir
+// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
+
+// REQUIRES: amdgpu-registered-target
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -std=c++11 -fclangir \
+// RUN:            -target-cpu gfx1010 -fcuda-is-device -emit-cir %s -o %t.cir
+// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
+
+// REQUIRES: amdgpu-registered-target
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -std=c++11 -fclangir \
+// RUN:            -target-cpu gfx1012 -fcuda-is-device -emit-cir %s -o %t.cir
+// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
+
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -std=c++11 -fclangir \
+// RUN:            -target-cpu tonga -fcuda-is-device -emit-llvm %s -o %t.ll
+// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s
+
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -std=c++11 -fclangir \
+// RUN:            -target-cpu gfx900 -fcuda-is-device -emit-llvm %s -o %t.ll
+// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s
+
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -std=c++11 -fclangir \
+// RUN:            -target-cpu gfx1010 -fcuda-is-device -emit-llvm %s -o %t.ll
+// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s
+
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -std=c++11 -fclangir \
+// RUN:            -target-cpu gfx1012 -fcuda-is-device -emit-llvm %s -o %t.ll
+// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s
+
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -std=c++11 \
+// RUN:            -target-cpu tonga -fcuda-is-device -emit-llvm %s -o %t.ll
+// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s
+
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -std=c++11 \
+// RUN:            -target-cpu gfx900 -fcuda-is-device -emit-llvm %s -o %t.ll
+// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s
+
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -std=c++11 \
+// RUN:            -target-cpu gfx1010 -fcuda-is-device -emit-llvm %s -o %t.ll
+// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s
+
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -std=c++11 \
+// RUN:            -target-cpu gfx1012 -fcuda-is-device -emit-llvm %s -o %t.ll
+// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s
+
+//===----------------------------------------------------------------------===//
+// Test AMDGPU builtins
+//===----------------------------------------------------------------------===//
+
+// CIR-LABEL: @_Z14test_ldexp_f16PDF16_DF16_i
+// CIR: [[TRUNC:%.*]] = cir.cast integral {{.*}} : !s32i -> !s16i
+// CIR: cir.call_llvm_intrinsic "ldexp" {{.*}}, [[TRUNC]] : (!cir.f16, !s16i) 
-> !cir.f16
+// LLVM: define{{.*}} void @_Z14test_ldexp_f16PDF16_DF16_i
+// LLVM: [[TRUNC:%.*]] = trunc i32 {{.*}} to i16
+// LLVM: call{{.*}} half @llvm.ldexp.f16.i16(half %{{.*}}, i16 [[TRUNC]])
+__device__ void test_ldexp_f16(_Float16* out, _Float16 a, int b) {
+    *out = __builtin_amdgcn_ldexph(a, b);
+}
diff --git a/clang/test/CIR/CodeGenHIP/builtins-amdgcn.hip 
b/clang/test/CIR/CodeGenHIP/builtins-amdgcn.hip
index 4a61fde7aa90c..28c9fcb6963bb 100644
--- a/clang/test/CIR/CodeGenHIP/builtins-amdgcn.hip
+++ b/clang/test/CIR/CodeGenHIP/builtins-amdgcn.hip
@@ -71,3 +71,19 @@ __device__ void test_div_fmas_f64(double* out, double a, 
double b, double c, int
 __device__ void test_ds_swizzle(int* out, int a) {
   *out = __builtin_amdgcn_ds_swizzle(a, 32);
 }
+
+// CIR-LABEL: @_Z14test_ldexp_f32Pffi
+// CIR: cir.call_llvm_intrinsic "ldexp" {{.*}} : (!cir.float, !s32i) -> 
!cir.float
+// LLVM: define{{.*}} void @_Z14test_ldexp_f32Pffi
+// LLVM: call{{.*}} float @llvm.ldexp.f32.i32(float %{{.*}}, i32 %{{.*}})
+__device__ void test_ldexp_f32(float* out, float a, int b) {
+  *out = __builtin_amdgcn_ldexpf(a, b);
+}
+
+// CIR-LABEL: @_Z14test_ldexp_f64Pddi
+// CIR: cir.call_llvm_intrinsic "ldexp" {{.*}} : (!cir.double, !s32i) -> 
!cir.double
+// LLVM: define{{.*}} void @_Z14test_ldexp_f64Pddi
+// LLVM: call{{.*}} double @llvm.ldexp.f64.i32(double %{{.*}}, i32 %{{.*}})
+__device__ void test_ldexp_f64(double* out, double a, int b) {
+  *out = __builtin_amdgcn_ldexp(a, b);
+}

_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] [CIR][AMDGPU] Add support for AMDGCN ldexp builtins (PR #198087)

Reply via email to