https://github.com/ayokunle321 created 
https://github.com/llvm/llvm-project/pull/197447

Adds codegen support for the following AMDGPU reciprocal builtins:

- __builtin_amdgcn_rcp (double)
- __builtin_amdgcn_rcpf (float)
- __builtin_amdgcn_rcph (half)
- __builtin_amdgcn_rcp_bf16 (bfloat16)

These are lowered to the corresponding `llvm.amdgcn.rcp` intrinsic.

>From 41452bf3cfc4c5e9308119211c1a165707d8fe6e Mon Sep 17 00:00:00 2001
From: Ayokunle Amodu <[email protected]>
Date: Wed, 13 May 2026 09:35:15 -0400
Subject: [PATCH] add amdgcn rcp builtins

---
 clang/lib/CIR/CodeGen/CIRGenBuiltinAMDGPU.cpp |  7 +++--
 .../CodeGenHIP/builtins-amdgcn-gfx1250.hip    | 26 +++++++++++++++++++
 clang/test/CIR/CodeGenHIP/builtins-amdgcn.hip | 20 ++++++++++++++
 3 files changed, 49 insertions(+), 4 deletions(-)
 create mode 100644 clang/test/CIR/CodeGenHIP/builtins-amdgcn-gfx1250.hip

diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinAMDGPU.cpp 
b/clang/lib/CIR/CodeGen/CIRGenBuiltinAMDGPU.cpp
index 04ab1c29b0d63..9837140b22491 100644
--- a/clang/lib/CIR/CodeGen/CIRGenBuiltinAMDGPU.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenBuiltinAMDGPU.cpp
@@ -211,10 +211,9 @@ CIRGenFunction::emitAMDGPUBuiltinExpr(unsigned builtinId,
   case AMDGPU::BI__builtin_amdgcn_rcpf:
   case AMDGPU::BI__builtin_amdgcn_rcph:
   case AMDGPU::BI__builtin_amdgcn_rcp_bf16: {
-    cgm.errorNYI(expr->getSourceRange(),
-                 std::string("unimplemented AMDGPU builtin call: ") +
-                     getContext().BuiltinInfo.getName(builtinId));
-    return mlir::Value{};
+    mlir::Value src = emitScalarExpr(expr->getArg(0));
+    return builder.emitIntrinsicCallOp(getLoc(expr->getExprLoc()), 
"amdgcn.rcp",
+                                       src.getType(), mlir::ValueRange{src});
   }
   case AMDGPU::BI__builtin_amdgcn_sqrt:
   case AMDGPU::BI__builtin_amdgcn_sqrtf:
diff --git a/clang/test/CIR/CodeGenHIP/builtins-amdgcn-gfx1250.hip 
b/clang/test/CIR/CodeGenHIP/builtins-amdgcn-gfx1250.hip
new file mode 100644
index 0000000000000..3cb81826a9b01
--- /dev/null
+++ b/clang/test/CIR/CodeGenHIP/builtins-amdgcn-gfx1250.hip
@@ -0,0 +1,26 @@
+#include "../CodeGenCUDA/Inputs/cuda.h"
+
+// REQUIRES: amdgpu-registered-target
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -std=c++11 -fclangir \
+// RUN:            -target-cpu gfx1250 -fcuda-is-device -emit-cir %s -o %t.cir
+// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
+
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -std=c++11 -fclangir \
+// RUN:            -target-cpu gfx1250 -fcuda-is-device -emit-llvm %s -o %t.ll
+// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s
+
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -std=c++11 \
+// RUN:            -target-cpu gfx1250 -fcuda-is-device -emit-llvm %s -o %t.ll
+// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s
+
+//===----------------------------------------------------------------------===//
+// Test AMDGPU builtins
+//===----------------------------------------------------------------------===//
+
+// CIR-LABEL: @_Z13test_rcp_bf16PDF16bDF16b
+// CIR: cir.call_llvm_intrinsic "amdgcn.rcp" {{.*}} : (!cir.bf16) -> !cir.bf16
+// LLVM: define{{.*}} void @_Z13test_rcp_bf16PDF16bDF16b
+// LLVM: call{{.*}} bfloat @llvm.amdgcn.rcp.bf16(bfloat %{{.*}})
+__device__ void test_rcp_bf16(__bf16* out, __bf16 a) {
+  *out = __builtin_amdgcn_rcp_bf16(a);
+}
diff --git a/clang/test/CIR/CodeGenHIP/builtins-amdgcn.hip 
b/clang/test/CIR/CodeGenHIP/builtins-amdgcn.hip
index 4a61fde7aa90c..16155d4374c81 100644
--- a/clang/test/CIR/CodeGenHIP/builtins-amdgcn.hip
+++ b/clang/test/CIR/CodeGenHIP/builtins-amdgcn.hip
@@ -71,3 +71,23 @@ __device__ void test_div_fmas_f64(double* out, double a, 
double b, double c, int
 __device__ void test_ds_swizzle(int* out, int a) {
   *out = __builtin_amdgcn_ds_swizzle(a, 32);
 }
+
+// CIR-LABEL: @_Z12test_rcp_f32Pff
+// CIR: cir.call_llvm_intrinsic "amdgcn.rcp" {{.*}} : (!cir.float) -> 
!cir.float
+// LLVM: define{{.*}} void @_Z12test_rcp_f32Pff
+// LLVM: call{{.*}} float @llvm.amdgcn.rcp.f32(float %{{.*}})
+// OGCG: define{{.*}} void @_Z12test_rcp_f32Pff
+// OGCG: call{{.*}} float @llvm.amdgcn.rcp.f32(float %{{.*}})
+__device__ void test_rcp_f32(float* out, float a) {
+  *out = __builtin_amdgcn_rcpf(a);
+}
+
+// CIR-LABEL: @_Z12test_rcp_f64Pdd
+// CIR: cir.call_llvm_intrinsic "amdgcn.rcp" {{.*}} : (!cir.double) -> 
!cir.double
+// LLVM: define{{.*}} void @_Z12test_rcp_f64Pdd
+// LLVM: call{{.*}} double @llvm.amdgcn.rcp.f64(double %{{.*}})
+// OGCG: define{{.*}} void @_Z12test_rcp_f64Pdd
+// OGCG: call{{.*}} double @llvm.amdgcn.rcp.f64(double %{{.*}})
+__device__ void test_rcp_f64(double* out, double a) {
+  *out = __builtin_amdgcn_rcp(a);
+}

_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to