https://github.com/ayokunle321 created https://github.com/llvm/llvm-project/pull/198087
Adds support for the following AMDGPU ldexp builtins: - __builtin_amdgcn_ldexp (double) - __builtin_amdgcn_ldexpf (float) - __builtin_amdgcn_ldexph (half) These are lowered to the `llvm.ldexp` intrinsic. >From 0aefe8fee051c718ca666193a4bebd01ba3616d1 Mon Sep 17 00:00:00 2001 From: Ayokunle Amodu <[email protected]> Date: Sat, 16 May 2026 09:23:03 -0400 Subject: [PATCH] add ldexp builtin --- clang/lib/CIR/CodeGen/CIRGenBuiltinAMDGPU.cpp | 22 ++++-- .../CIR/CodeGenHIP/builtins-amdgcn-vi.hip | 67 +++++++++++++++++++ clang/test/CIR/CodeGenHIP/builtins-amdgcn.hip | 16 +++++ 3 files changed, 100 insertions(+), 5 deletions(-) create mode 100644 clang/test/CIR/CodeGenHIP/builtins-amdgcn-vi.hip diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinAMDGPU.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltinAMDGPU.cpp index 04ab1c29b0d63..67ca5e730a17c 100644 --- a/clang/lib/CIR/CodeGen/CIRGenBuiltinAMDGPU.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenBuiltinAMDGPU.cpp @@ -284,12 +284,24 @@ CIRGenFunction::emitAMDGPUBuiltinExpr(unsigned builtinId, return mlir::Value{}; } case AMDGPU::BI__builtin_amdgcn_ldexp: - case AMDGPU::BI__builtin_amdgcn_ldexpf: + case AMDGPU::BI__builtin_amdgcn_ldexpf: { + mlir::Value src0 = emitScalarExpr(expr->getArg(0)); + mlir::Value src1 = emitScalarExpr(expr->getArg(1)); + return builder.emitIntrinsicCallOp(getLoc(expr->getExprLoc()), "ldexp", + src0.getType(), + mlir::ValueRange{src0, src1}); + } case AMDGPU::BI__builtin_amdgcn_ldexph: { - cgm.errorNYI(expr->getSourceRange(), - std::string("unimplemented AMDGPU builtin call: ") + - getContext().BuiltinInfo.getName(builtinId)); - return mlir::Value{}; + // The raw instruction has a different behavior for out of bounds exponent + // values (implicit truncation instead of saturate to short_min/short_max). + mlir::Value src0 = emitScalarExpr(expr->getArg(0)); + mlir::Value src1 = emitScalarExpr(expr->getArg(1)); + mlir::Value truncSrc1 = cir::CastOp::create( + builder, getLoc(expr->getExprLoc()), builder.getSInt16Ty(), + cir::CastKind::integral, src1); + return builder.emitIntrinsicCallOp(getLoc(expr->getExprLoc()), "ldexp", + src0.getType(), + mlir::ValueRange{src0, truncSrc1}); } case AMDGPU::BI__builtin_amdgcn_frexp_mant: case AMDGPU::BI__builtin_amdgcn_frexp_mantf: diff --git a/clang/test/CIR/CodeGenHIP/builtins-amdgcn-vi.hip b/clang/test/CIR/CodeGenHIP/builtins-amdgcn-vi.hip new file mode 100644 index 0000000000000..dcde886c205dc --- /dev/null +++ b/clang/test/CIR/CodeGenHIP/builtins-amdgcn-vi.hip @@ -0,0 +1,67 @@ +#include "../CodeGenCUDA/Inputs/cuda.h" + +// REQUIRES: amdgpu-registered-target +// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -std=c++11 -fclangir \ +// RUN: -target-cpu tonga -fcuda-is-device -emit-cir %s -o %t.cir +// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s + +// REQUIRES: amdgpu-registered-target +// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -std=c++11 -fclangir \ +// RUN: -target-cpu gfx900 -fcuda-is-device -emit-cir %s -o %t.cir +// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s + +// REQUIRES: amdgpu-registered-target +// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -std=c++11 -fclangir \ +// RUN: -target-cpu gfx1010 -fcuda-is-device -emit-cir %s -o %t.cir +// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s + +// REQUIRES: amdgpu-registered-target +// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -std=c++11 -fclangir \ +// RUN: -target-cpu gfx1012 -fcuda-is-device -emit-cir %s -o %t.cir +// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s + +// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -std=c++11 -fclangir \ +// RUN: -target-cpu tonga -fcuda-is-device -emit-llvm %s -o %t.ll +// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s + +// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -std=c++11 -fclangir \ +// RUN: -target-cpu gfx900 -fcuda-is-device -emit-llvm %s -o %t.ll +// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s + +// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -std=c++11 -fclangir \ +// RUN: -target-cpu gfx1010 -fcuda-is-device -emit-llvm %s -o %t.ll +// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s + +// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -std=c++11 -fclangir \ +// RUN: -target-cpu gfx1012 -fcuda-is-device -emit-llvm %s -o %t.ll +// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s + +// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -std=c++11 \ +// RUN: -target-cpu tonga -fcuda-is-device -emit-llvm %s -o %t.ll +// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s + +// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -std=c++11 \ +// RUN: -target-cpu gfx900 -fcuda-is-device -emit-llvm %s -o %t.ll +// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s + +// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -std=c++11 \ +// RUN: -target-cpu gfx1010 -fcuda-is-device -emit-llvm %s -o %t.ll +// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s + +// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -std=c++11 \ +// RUN: -target-cpu gfx1012 -fcuda-is-device -emit-llvm %s -o %t.ll +// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s + +//===----------------------------------------------------------------------===// +// Test AMDGPU builtins +//===----------------------------------------------------------------------===// + +// CIR-LABEL: @_Z14test_ldexp_f16PDF16_DF16_i +// CIR: [[TRUNC:%.*]] = cir.cast integral {{.*}} : !s32i -> !s16i +// CIR: cir.call_llvm_intrinsic "ldexp" {{.*}}, [[TRUNC]] : (!cir.f16, !s16i) -> !cir.f16 +// LLVM: define{{.*}} void @_Z14test_ldexp_f16PDF16_DF16_i +// LLVM: [[TRUNC:%.*]] = trunc i32 {{.*}} to i16 +// LLVM: call{{.*}} half @llvm.ldexp.f16.i16(half %{{.*}}, i16 [[TRUNC]]) +__device__ void test_ldexp_f16(_Float16* out, _Float16 a, int b) { + *out = __builtin_amdgcn_ldexph(a, b); +} diff --git a/clang/test/CIR/CodeGenHIP/builtins-amdgcn.hip b/clang/test/CIR/CodeGenHIP/builtins-amdgcn.hip index 4a61fde7aa90c..28c9fcb6963bb 100644 --- a/clang/test/CIR/CodeGenHIP/builtins-amdgcn.hip +++ b/clang/test/CIR/CodeGenHIP/builtins-amdgcn.hip @@ -71,3 +71,19 @@ __device__ void test_div_fmas_f64(double* out, double a, double b, double c, int __device__ void test_ds_swizzle(int* out, int a) { *out = __builtin_amdgcn_ds_swizzle(a, 32); } + +// CIR-LABEL: @_Z14test_ldexp_f32Pffi +// CIR: cir.call_llvm_intrinsic "ldexp" {{.*}} : (!cir.float, !s32i) -> !cir.float +// LLVM: define{{.*}} void @_Z14test_ldexp_f32Pffi +// LLVM: call{{.*}} float @llvm.ldexp.f32.i32(float %{{.*}}, i32 %{{.*}}) +__device__ void test_ldexp_f32(float* out, float a, int b) { + *out = __builtin_amdgcn_ldexpf(a, b); +} + +// CIR-LABEL: @_Z14test_ldexp_f64Pddi +// CIR: cir.call_llvm_intrinsic "ldexp" {{.*}} : (!cir.double, !s32i) -> !cir.double +// LLVM: define{{.*}} void @_Z14test_ldexp_f64Pddi +// LLVM: call{{.*}} double @llvm.ldexp.f64.i32(double %{{.*}}, i32 %{{.*}}) +__device__ void test_ldexp_f64(double* out, double a, int b) { + *out = __builtin_amdgcn_ldexp(a, b); +} _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
