Author: AbdallahRashed Date: 2026-05-07T09:55:09-07:00 New Revision: 8f45e083ebe0e471d315ad1a4187730081c3dbdc
URL: https://github.com/llvm/llvm-project/commit/8f45e083ebe0e471d315ad1a4187730081c3dbdc DIFF: https://github.com/llvm/llvm-project/commit/8f45e083ebe0e471d315ad1a4187730081c3dbdc.diff LOG: [CIR][CUDA] Implement NVVM math builtins (fabs, ex2_approx) (#195663) Replace errorNYI stubs with actual cir::LLVMIntrinsicCallOp calls for: - __nvvm_fabs_f, __nvvm_fabs_d, __nvvm_fabs_ftz_f (+ f16/bf16 variants) - __nvvm_ex2_approx_f, __nvvm_ex2_approx_d, __nvvm_ex2_approx_ftz_f __nvvm_fabs_d maps to standard llvm.fabs (matching classic CodeGen), while the rest map to their respective nvvm.* intrinsics. Part of #179278 Added: clang/test/CIR/CodeGenCUDA/builtins-nvvm-math.cu Modified: clang/lib/CIR/CodeGen/CIRGenBuiltinNVPTX.cpp Removed: ################################################################################ diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinNVPTX.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltinNVPTX.cpp index 23a0dfc579ad2..52f98af8028b4 100644 --- a/clang/lib/CIR/CodeGen/CIRGenBuiltinNVPTX.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenBuiltinNVPTX.cpp @@ -14,10 +14,24 @@ #include "mlir/IR/Value.h" #include "clang/Basic/TargetBuiltins.h" +#include "clang/CIR/Dialect/IR/CIRDialect.h" using namespace clang; using namespace clang::CIRGen; +/// Emit a CIR LLVMIntrinsicCallOp for a unary NVVM intrinsic. +/// The result type is inferred from the single argument. +static mlir::Value emitUnaryNVVMIntrinsic(CIRGenFunction &cgf, + const CallExpr *expr, + llvm::StringRef intrinsicName) { + auto &builder = cgf.getBuilder(); + mlir::Value arg = cgf.emitScalarExpr(expr->getArg(0)); + return cir::LLVMIntrinsicCallOp::create( + builder, cgf.getLoc(expr->getExprLoc()), + builder.getStringAttr(intrinsicName), arg.getType(), {arg}) + .getResult(); +} + std::optional<mlir::Value> CIRGenFunction::emitNVPTXBuiltinExpr(unsigned builtinId, const CallExpr *expr) { switch (builtinId) { @@ -774,33 +788,18 @@ CIRGenFunction::emitNVPTXBuiltinExpr(unsigned builtinId, const CallExpr *expr) { case NVPTX::BI__nvvm_abs_bf16x2: case NVPTX::BI__nvvm_fabs_f16: case NVPTX::BI__nvvm_fabs_f16x2: - cgm.errorNYI(expr->getSourceRange(), - std::string("unimplemented NVPTX builtin call: ") + - getContext().BuiltinInfo.getName(builtinId)); - return mlir::Value{}; + return emitUnaryNVVMIntrinsic(*this, expr, "nvvm.fabs"); case NVPTX::BI__nvvm_fabs_ftz_f: case NVPTX::BI__nvvm_fabs_ftz_f16: case NVPTX::BI__nvvm_fabs_ftz_f16x2: - cgm.errorNYI(expr->getSourceRange(), - std::string("unimplemented NVPTX builtin call: ") + - getContext().BuiltinInfo.getName(builtinId)); - return mlir::Value{}; + return emitUnaryNVVMIntrinsic(*this, expr, "nvvm.fabs.ftz"); case NVPTX::BI__nvvm_fabs_d: - cgm.errorNYI(expr->getSourceRange(), - std::string("unimplemented NVPTX builtin call: ") + - getContext().BuiltinInfo.getName(builtinId)); - return mlir::Value{}; + return emitUnaryNVVMIntrinsic(*this, expr, "fabs"); case NVPTX::BI__nvvm_ex2_approx_d: case NVPTX::BI__nvvm_ex2_approx_f: - cgm.errorNYI(expr->getSourceRange(), - std::string("unimplemented NVPTX builtin call: ") + - getContext().BuiltinInfo.getName(builtinId)); - return mlir::Value{}; + return emitUnaryNVVMIntrinsic(*this, expr, "nvvm.ex2.approx"); case NVPTX::BI__nvvm_ex2_approx_ftz_f: - cgm.errorNYI(expr->getSourceRange(), - std::string("unimplemented NVPTX builtin call: ") + - getContext().BuiltinInfo.getName(builtinId)); - return mlir::Value{}; + return emitUnaryNVVMIntrinsic(*this, expr, "nvvm.ex2.approx.ftz"); case NVPTX::BI__nvvm_ldg_h: case NVPTX::BI__nvvm_ldg_h2: cgm.errorNYI(expr->getSourceRange(), diff --git a/clang/test/CIR/CodeGenCUDA/builtins-nvvm-math.cu b/clang/test/CIR/CodeGenCUDA/builtins-nvvm-math.cu new file mode 100644 index 0000000000000..a2f0edb6d073b --- /dev/null +++ b/clang/test/CIR/CodeGenCUDA/builtins-nvvm-math.cu @@ -0,0 +1,65 @@ +#include "Inputs/cuda.h" + +// RUN: %clang_cc1 -triple nvptx64-nvidia-cuda -target-cpu sm_80 -x cuda \ +// RUN: -fcuda-is-device -fclangir -emit-cir %s -o %t.cir +// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s + +// RUN: %clang_cc1 -triple nvptx64-nvidia-cuda -target-cpu sm_80 -x cuda \ +// RUN: -fcuda-is-device -fclangir -emit-llvm %s -o %t-cir.ll +// RUN: FileCheck --check-prefix=LLVM --input-file=%t-cir.ll %s + +// RUN: %clang_cc1 -triple nvptx64-nvidia-cuda -target-cpu sm_80 -x cuda \ +// RUN: -fcuda-is-device -emit-llvm %s -o %t.ll +// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s + +// FIXME: CIR doesn't propagate the 'contract' fast-math flag to LLVM IR calls +// yet, so LLVM check lines use {{.*}} to tolerate the diff erence between +// CIR (no flags) and classic codegen ('contract'). + +// CIR-LABEL: @_Z11test_fabs_ff +// CIR: cir.call_llvm_intrinsic "nvvm.fabs" {{.*}} : (!cir.float) -> !cir.float +// LLVM-LABEL: @_Z11test_fabs_ff +// LLVM: call {{.*}}float @llvm.nvvm.fabs.f32(float +__device__ float test_fabs_f(float x) { + return __nvvm_fabs_f(x); +} + +// CIR-LABEL: @_Z15test_fabs_ftz_ff +// CIR: cir.call_llvm_intrinsic "nvvm.fabs.ftz" {{.*}} : (!cir.float) -> !cir.float +// LLVM-LABEL: @_Z15test_fabs_ftz_ff +// LLVM: call {{.*}}float @llvm.nvvm.fabs.ftz.f32(float +__device__ float test_fabs_ftz_f(float x) { + return __nvvm_fabs_ftz_f(x); +} + +// CIR-LABEL: @_Z11test_fabs_dd +// CIR: cir.call_llvm_intrinsic "fabs" {{.*}} : (!cir.double) -> !cir.double +// LLVM-LABEL: @_Z11test_fabs_dd +// LLVM: call {{.*}}double @llvm.fabs.f64(double +__device__ double test_fabs_d(double x) { + return __nvvm_fabs_d(x); +} + +// CIR-LABEL: @_Z17test_ex2_approx_ff +// CIR: cir.call_llvm_intrinsic "nvvm.ex2.approx" {{.*}} : (!cir.float) -> !cir.float +// LLVM-LABEL: @_Z17test_ex2_approx_ff +// LLVM: call {{.*}}float @llvm.nvvm.ex2.approx.f32(float +__device__ float test_ex2_approx_f(float x) { + return __nvvm_ex2_approx_f(x); +} + +// CIR-LABEL: @_Z17test_ex2_approx_dd +// CIR: cir.call_llvm_intrinsic "nvvm.ex2.approx" {{.*}} : (!cir.double) -> !cir.double +// LLVM-LABEL: @_Z17test_ex2_approx_dd +// LLVM: call {{.*}}double @llvm.nvvm.ex2.approx.f64(double +__device__ double test_ex2_approx_d(double x) { + return __nvvm_ex2_approx_d(x); +} + +// CIR-LABEL: @_Z21test_ex2_approx_ftz_ff +// CIR: cir.call_llvm_intrinsic "nvvm.ex2.approx.ftz" {{.*}} : (!cir.float) -> !cir.float +// LLVM-LABEL: @_Z21test_ex2_approx_ftz_ff +// LLVM: call {{.*}}float @llvm.nvvm.ex2.approx.ftz.f32(float +__device__ float test_ex2_approx_ftz_f(float x) { + return __nvvm_ex2_approx_ftz_f(x); +} _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
