[clang] 8f45e08 - [CIR][CUDA] Implement NVVM math builtins (fabs, ex2_approx) (#195663)

via cfe-commits Thu, 07 May 2026 09:55:29 -0700

Author: AbdallahRashed
Date: 2026-05-07T09:55:09-07:00
New Revision: 8f45e083ebe0e471d315ad1a4187730081c3dbdc


URL: 
https://github.com/llvm/llvm-project/commit/8f45e083ebe0e471d315ad1a4187730081c3dbdc
DIFF: 
https://github.com/llvm/llvm-project/commit/8f45e083ebe0e471d315ad1a4187730081c3dbdc.diff

LOG: [CIR][CUDA] Implement NVVM math builtins (fabs, ex2_approx) (#195663)

Replace errorNYI stubs with actual cir::LLVMIntrinsicCallOp calls for:
- __nvvm_fabs_f, __nvvm_fabs_d, __nvvm_fabs_ftz_f (+ f16/bf16 variants)
- __nvvm_ex2_approx_f, __nvvm_ex2_approx_d, __nvvm_ex2_approx_ftz_f

__nvvm_fabs_d maps to standard llvm.fabs (matching classic CodeGen),
while the rest map to their respective nvvm.* intrinsics.

Part of #179278

Added: 
    clang/test/CIR/CodeGenCUDA/builtins-nvvm-math.cu

Modified: 
    clang/lib/CIR/CodeGen/CIRGenBuiltinNVPTX.cpp

Removed: 
    


################################################################################
diff  --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinNVPTX.cpp 
b/clang/lib/CIR/CodeGen/CIRGenBuiltinNVPTX.cpp
index 23a0dfc579ad2..52f98af8028b4 100644
--- a/clang/lib/CIR/CodeGen/CIRGenBuiltinNVPTX.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenBuiltinNVPTX.cpp
@@ -14,10 +14,24 @@
 
 #include "mlir/IR/Value.h"
 #include "clang/Basic/TargetBuiltins.h"
+#include "clang/CIR/Dialect/IR/CIRDialect.h"
 
 using namespace clang;
 using namespace clang::CIRGen;
 
+/// Emit a CIR LLVMIntrinsicCallOp for a unary NVVM intrinsic.
+/// The result type is inferred from the single argument.
+static mlir::Value emitUnaryNVVMIntrinsic(CIRGenFunction &cgf,
+                                          const CallExpr *expr,
+                                          llvm::StringRef intrinsicName) {
+  auto &builder = cgf.getBuilder();
+  mlir::Value arg = cgf.emitScalarExpr(expr->getArg(0));
+  return cir::LLVMIntrinsicCallOp::create(
+             builder, cgf.getLoc(expr->getExprLoc()),
+             builder.getStringAttr(intrinsicName), arg.getType(), {arg})
+      .getResult();
+}
+
 std::optional<mlir::Value>
 CIRGenFunction::emitNVPTXBuiltinExpr(unsigned builtinId, const CallExpr *expr) 
{
   switch (builtinId) {
@@ -774,33 +788,18 @@ CIRGenFunction::emitNVPTXBuiltinExpr(unsigned builtinId, 
const CallExpr *expr) {
   case NVPTX::BI__nvvm_abs_bf16x2:
   case NVPTX::BI__nvvm_fabs_f16:
   case NVPTX::BI__nvvm_fabs_f16x2:
-    cgm.errorNYI(expr->getSourceRange(),
-                 std::string("unimplemented NVPTX builtin call: ") +
-                     getContext().BuiltinInfo.getName(builtinId));
-    return mlir::Value{};
+    return emitUnaryNVVMIntrinsic(*this, expr, "nvvm.fabs");
   case NVPTX::BI__nvvm_fabs_ftz_f:
   case NVPTX::BI__nvvm_fabs_ftz_f16:
   case NVPTX::BI__nvvm_fabs_ftz_f16x2:
-    cgm.errorNYI(expr->getSourceRange(),
-                 std::string("unimplemented NVPTX builtin call: ") +
-                     getContext().BuiltinInfo.getName(builtinId));
-    return mlir::Value{};
+    return emitUnaryNVVMIntrinsic(*this, expr, "nvvm.fabs.ftz");
   case NVPTX::BI__nvvm_fabs_d:
-    cgm.errorNYI(expr->getSourceRange(),
-                 std::string("unimplemented NVPTX builtin call: ") +
-                     getContext().BuiltinInfo.getName(builtinId));
-    return mlir::Value{};
+    return emitUnaryNVVMIntrinsic(*this, expr, "fabs");
   case NVPTX::BI__nvvm_ex2_approx_d:
   case NVPTX::BI__nvvm_ex2_approx_f:
-    cgm.errorNYI(expr->getSourceRange(),
-                 std::string("unimplemented NVPTX builtin call: ") +
-                     getContext().BuiltinInfo.getName(builtinId));
-    return mlir::Value{};
+    return emitUnaryNVVMIntrinsic(*this, expr, "nvvm.ex2.approx");
   case NVPTX::BI__nvvm_ex2_approx_ftz_f:
-    cgm.errorNYI(expr->getSourceRange(),
-                 std::string("unimplemented NVPTX builtin call: ") +
-                     getContext().BuiltinInfo.getName(builtinId));
-    return mlir::Value{};
+    return emitUnaryNVVMIntrinsic(*this, expr, "nvvm.ex2.approx.ftz");
   case NVPTX::BI__nvvm_ldg_h:
   case NVPTX::BI__nvvm_ldg_h2:
     cgm.errorNYI(expr->getSourceRange(),

diff  --git a/clang/test/CIR/CodeGenCUDA/builtins-nvvm-math.cu 
b/clang/test/CIR/CodeGenCUDA/builtins-nvvm-math.cu
new file mode 100644
index 0000000000000..a2f0edb6d073b
--- /dev/null
+++ b/clang/test/CIR/CodeGenCUDA/builtins-nvvm-math.cu
@@ -0,0 +1,65 @@
+#include "Inputs/cuda.h"
+
+// RUN: %clang_cc1 -triple nvptx64-nvidia-cuda -target-cpu sm_80 -x cuda \
+// RUN:            -fcuda-is-device -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
+
+// RUN: %clang_cc1 -triple nvptx64-nvidia-cuda -target-cpu sm_80 -x cuda \
+// RUN:            -fcuda-is-device -fclangir -emit-llvm %s -o %t-cir.ll
+// RUN: FileCheck --check-prefix=LLVM --input-file=%t-cir.ll %s
+
+// RUN: %clang_cc1 -triple nvptx64-nvidia-cuda -target-cpu sm_80 -x cuda \
+// RUN:            -fcuda-is-device -emit-llvm %s -o %t.ll
+// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s
+
+// FIXME: CIR doesn't propagate the 'contract' fast-math flag to LLVM IR calls
+// yet, so LLVM check lines use {{.*}} to tolerate the 
diff erence between
+// CIR (no flags) and classic codegen ('contract').
+
+// CIR-LABEL: @_Z11test_fabs_ff
+// CIR: cir.call_llvm_intrinsic "nvvm.fabs" {{.*}} : (!cir.float) -> !cir.float
+// LLVM-LABEL: @_Z11test_fabs_ff
+// LLVM: call {{.*}}float @llvm.nvvm.fabs.f32(float
+__device__ float test_fabs_f(float x) {
+  return __nvvm_fabs_f(x);
+}
+
+// CIR-LABEL: @_Z15test_fabs_ftz_ff
+// CIR: cir.call_llvm_intrinsic "nvvm.fabs.ftz" {{.*}} : (!cir.float) -> 
!cir.float
+// LLVM-LABEL: @_Z15test_fabs_ftz_ff
+// LLVM: call {{.*}}float @llvm.nvvm.fabs.ftz.f32(float
+__device__ float test_fabs_ftz_f(float x) {
+  return __nvvm_fabs_ftz_f(x);
+}
+
+// CIR-LABEL: @_Z11test_fabs_dd
+// CIR: cir.call_llvm_intrinsic "fabs" {{.*}} : (!cir.double) -> !cir.double
+// LLVM-LABEL: @_Z11test_fabs_dd
+// LLVM: call {{.*}}double @llvm.fabs.f64(double
+__device__ double test_fabs_d(double x) {
+  return __nvvm_fabs_d(x);
+}
+
+// CIR-LABEL: @_Z17test_ex2_approx_ff
+// CIR: cir.call_llvm_intrinsic "nvvm.ex2.approx" {{.*}} : (!cir.float) -> 
!cir.float
+// LLVM-LABEL: @_Z17test_ex2_approx_ff
+// LLVM: call {{.*}}float @llvm.nvvm.ex2.approx.f32(float
+__device__ float test_ex2_approx_f(float x) {
+  return __nvvm_ex2_approx_f(x);
+}
+
+// CIR-LABEL: @_Z17test_ex2_approx_dd
+// CIR: cir.call_llvm_intrinsic "nvvm.ex2.approx" {{.*}} : (!cir.double) -> 
!cir.double
+// LLVM-LABEL: @_Z17test_ex2_approx_dd
+// LLVM: call {{.*}}double @llvm.nvvm.ex2.approx.f64(double
+__device__ double test_ex2_approx_d(double x) {
+  return __nvvm_ex2_approx_d(x);
+}
+
+// CIR-LABEL: @_Z21test_ex2_approx_ftz_ff
+// CIR: cir.call_llvm_intrinsic "nvvm.ex2.approx.ftz" {{.*}} : (!cir.float) -> 
!cir.float
+// LLVM-LABEL: @_Z21test_ex2_approx_ftz_ff
+// LLVM: call {{.*}}float @llvm.nvvm.ex2.approx.ftz.f32(float
+__device__ float test_ex2_approx_ftz_f(float x) {
+  return __nvvm_ex2_approx_ftz_f(x);
+}


        
_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] 8f45e08 - [CIR][CUDA] Implement NVVM math builtins (fabs, ex2_approx) (#195663)

Reply via email to