https://github.com/phoebewang updated https://github.com/llvm/llvm-project/pull/189395
>From 302777cea89b2cfdf6771a08646cff8bd7fb078e Mon Sep 17 00:00:00 2001 From: Tharun V K <[email protected]> Date: Mon, 30 Mar 2026 19:54:27 +0530 Subject: [PATCH 1/4] Enable HasFastHalfType to prevent FP16 promotion and perform FABS/FNEG lowering --- clang/lib/Basic/Targets/X86.cpp | 2 + clang/test/CodeGen/X86/Float16-fneg-fabs.c | 18 +++++++++ llvm/test/CodeGen/X86/half-fneg-fabs.ll | 46 ++++++++++++++++++++++ 3 files changed, 66 insertions(+) create mode 100644 clang/test/CodeGen/X86/Float16-fneg-fabs.c create mode 100644 llvm/test/CodeGen/X86/half-fneg-fabs.ll diff --git a/clang/lib/Basic/Targets/X86.cpp b/clang/lib/Basic/Targets/X86.cpp index cb941c94c84a7..b6138fee89b4c 100644 --- a/clang/lib/Basic/Targets/X86.cpp +++ b/clang/lib/Basic/Targets/X86.cpp @@ -471,6 +471,8 @@ bool X86TargetInfo::handleTargetFeatures(std::vector<std::string> &Features, SSELevel = std::max(SSELevel, Level); HasFloat16 = SSELevel >= SSE2; + if (HasFloat16) + HasFastHalfType = true; // X86 target has bfloat16 emulation support in the backend, where // bfloat16 is treated as a 32-bit float, arithmetic operations are diff --git a/clang/test/CodeGen/X86/Float16-fneg-fabs.c b/clang/test/CodeGen/X86/Float16-fneg-fabs.c new file mode 100644 index 0000000000000..92760bd377926 --- /dev/null +++ b/clang/test/CodeGen/X86/Float16-fneg-fabs.c @@ -0,0 +1,18 @@ +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -target-feature +sse2 -O0 -emit-llvm %s -o - | FileCheck %s --check-prefixes=CHECK +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -target-feature +f16c -O0 -emit-llvm %s -o - | FileCheck %s --check-prefixes=CHECK + +_Float16 test_fneg(_Float16 x) { + // CHECK-LABEL: define {{.*}} @test_fneg + // CHECK-NOT: fpext + // CHECK: fneg half + // CHECK-NOT: fptrunc + return -x; +} + +_Float16 test_fabs(_Float16 x) { + // CHECK-LABEL: define {{.*}} @test_fabs + // CHECK-NOT: fpext + // CHECK: call half @llvm.fabs.f16(half + // CHECK-NOT: fptrunc + return __builtin_fabsf16(x); +} diff --git a/llvm/test/CodeGen/X86/half-fneg-fabs.ll b/llvm/test/CodeGen/X86/half-fneg-fabs.ll new file mode 100644 index 0000000000000..d24a5fbe1dfe7 --- /dev/null +++ b/llvm/test/CodeGen/X86/half-fneg-fabs.ll @@ -0,0 +1,46 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+sse2 | FileCheck %s --check-prefixes=SSE2 +; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+avx | FileCheck %s --check-prefixes=AVX +; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+f16c | FileCheck %s --check-prefixes=F16C + +; Test Floating Point Negation (fneg) +define half @test_fneg(half %a) nounwind { +; SSE2-LABEL: test_fneg: +; SSE2: # %bb.0: +; SSE2-NEXT: xorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; SSE2-NEXT: retq +; +; AVX-LABEL: test_fneg: +; AVX: # %bb.0: +; AVX-NEXT: vxorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX-NEXT: retq +; +; F16C-LABEL: test_fneg: +; F16C: # %bb.0: +; F16C-NEXT: vxorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; F16C-NEXT: retq + %res = fneg half %a + ret half %res +} + +; Test Floating Point Absolute Value (fabs) +define half @test_fabs(half %a) nounwind { +; SSE2-LABEL: test_fabs: +; SSE2: # %bb.0: +; SSE2-NEXT: andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; SSE2-NEXT: retq +; +; AVX-LABEL: test_fabs: +; AVX: # %bb.0: +; AVX-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX-NEXT: retq +; +; F16C-LABEL: test_fabs: +; F16C: # %bb.0: +; F16C-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; F16C-NEXT: retq + %res = call half @llvm.fabs.f16(half %a) + ret half %res +} + +declare half @llvm.fabs.f16(half) >From 897baf0d5bc83e48146292a9381fc43228b486ea Mon Sep 17 00:00:00 2001 From: Tharun V K <[email protected]> Date: Mon, 30 Mar 2026 22:55:07 +0530 Subject: [PATCH 2/4] Added fast-isel checks to the test --- llvm/test/CodeGen/X86/half-fneg-fabs.ll | 33 +++++++++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/llvm/test/CodeGen/X86/half-fneg-fabs.ll b/llvm/test/CodeGen/X86/half-fneg-fabs.ll index d24a5fbe1dfe7..98d48fe771adf 100644 --- a/llvm/test/CodeGen/X86/half-fneg-fabs.ll +++ b/llvm/test/CodeGen/X86/half-fneg-fabs.ll @@ -1,7 +1,10 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+sse2 | FileCheck %s --check-prefixes=SSE2 +; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+sse2 -fast-isel | FileCheck %s --check-prefixes=SSE2-FAST ; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+avx | FileCheck %s --check-prefixes=AVX +; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+avx -fast-isel | FileCheck %s --check-prefixes=AVX-FAST ; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+f16c | FileCheck %s --check-prefixes=F16C +; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+f16c -fast-isel | FileCheck %s --check-prefixes=F16C-FAST ; Test Floating Point Negation (fneg) define half @test_fneg(half %a) nounwind { @@ -10,15 +13,30 @@ define half @test_fneg(half %a) nounwind { ; SSE2-NEXT: xorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 ; SSE2-NEXT: retq ; +; SSE2-FAST-LABEL: test_fneg: +; SSE2-FAST: # %bb.0: +; SSE2-FAST-NEXT: xorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; SSE2-FAST-NEXT: retq +; ; AVX-LABEL: test_fneg: ; AVX: # %bb.0: ; AVX-NEXT: vxorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 ; AVX-NEXT: retq ; +; AVX-FAST-LABEL: test_fneg: +; AVX-FAST: # %bb.0: +; AVX-FAST-NEXT: vxorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX-FAST-NEXT: retq +; ; F16C-LABEL: test_fneg: ; F16C: # %bb.0: ; F16C-NEXT: vxorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 ; F16C-NEXT: retq +; +; F16C-FAST-LABEL: test_fneg: +; F16C-FAST: # %bb.0: +; F16C-FAST-NEXT: vxorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; F16C-FAST-NEXT: retq %res = fneg half %a ret half %res } @@ -30,15 +48,30 @@ define half @test_fabs(half %a) nounwind { ; SSE2-NEXT: andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 ; SSE2-NEXT: retq ; +; SSE2-FAST-LABEL: test_fabs: +; SSE2-FAST: # %bb.0: +; SSE2-FAST-NEXT: andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; SSE2-FAST-NEXT: retq +; ; AVX-LABEL: test_fabs: ; AVX: # %bb.0: ; AVX-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 ; AVX-NEXT: retq ; +; AVX-FAST-LABEL: test_fabs: +; AVX-FAST: # %bb.0: +; AVX-FAST-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX-FAST-NEXT: retq +; ; F16C-LABEL: test_fabs: ; F16C: # %bb.0: ; F16C-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 ; F16C-NEXT: retq +; +; F16C-FAST-LABEL: test_fabs: +; F16C-FAST: # %bb.0: +; F16C-FAST-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; F16C-FAST-NEXT: retq %res = call half @llvm.fabs.f16(half %a) ret half %res } >From 2a7ae8ef29a7d1ca45f651532d2da5498e73614c Mon Sep 17 00:00:00 2001 From: Tharun V K <[email protected]> Date: Mon, 6 Apr 2026 15:06:54 +0530 Subject: [PATCH 3/4] Fold fptrunc(fneg/fabs(fpext(x))) for f16 to avoid f32 promotion --- clang/lib/Basic/Targets/X86.cpp | 2 -- clang/test/CodeGen/X86/Float16-fneg-fabs.c | 18 ------------------ llvm/lib/Target/X86/X86ISelLowering.cpp | 9 +++++++++ llvm/test/CodeGen/X86/half-fneg-fabs.ll | 14 +++++++------- 4 files changed, 16 insertions(+), 27 deletions(-) delete mode 100644 clang/test/CodeGen/X86/Float16-fneg-fabs.c diff --git a/clang/lib/Basic/Targets/X86.cpp b/clang/lib/Basic/Targets/X86.cpp index b6138fee89b4c..cb941c94c84a7 100644 --- a/clang/lib/Basic/Targets/X86.cpp +++ b/clang/lib/Basic/Targets/X86.cpp @@ -471,8 +471,6 @@ bool X86TargetInfo::handleTargetFeatures(std::vector<std::string> &Features, SSELevel = std::max(SSELevel, Level); HasFloat16 = SSELevel >= SSE2; - if (HasFloat16) - HasFastHalfType = true; // X86 target has bfloat16 emulation support in the backend, where // bfloat16 is treated as a 32-bit float, arithmetic operations are diff --git a/clang/test/CodeGen/X86/Float16-fneg-fabs.c b/clang/test/CodeGen/X86/Float16-fneg-fabs.c deleted file mode 100644 index 92760bd377926..0000000000000 --- a/clang/test/CodeGen/X86/Float16-fneg-fabs.c +++ /dev/null @@ -1,18 +0,0 @@ -// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -target-feature +sse2 -O0 -emit-llvm %s -o - | FileCheck %s --check-prefixes=CHECK -// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -target-feature +f16c -O0 -emit-llvm %s -o - | FileCheck %s --check-prefixes=CHECK - -_Float16 test_fneg(_Float16 x) { - // CHECK-LABEL: define {{.*}} @test_fneg - // CHECK-NOT: fpext - // CHECK: fneg half - // CHECK-NOT: fptrunc - return -x; -} - -_Float16 test_fabs(_Float16 x) { - // CHECK-LABEL: define {{.*}} @test_fabs - // CHECK-NOT: fpext - // CHECK: call half @llvm.fabs.f16(half - // CHECK-NOT: fptrunc - return __builtin_fabsf16(x); -} diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 76bb9db5c5b85..82a8f0d1098d5 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -23034,6 +23034,15 @@ SDValue X86TargetLowering::LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const { MVT VT = Op.getSimpleValueType(); MVT SVT = In.getSimpleValueType(); + if (!IsStrict && VT == MVT::f16 && + (In.getOpcode() == ISD::FNEG || In.getOpcode() == ISD::FABS)) { + SDValue Inner = In.getOperand(0); + if (Inner.getOpcode() == ISD::FP_EXTEND && + Inner.getOperand(0).getValueType() == MVT::f16) { + return DAG.getNode(In.getOpcode(), DL, MVT::f16, Inner.getOperand(0)); + } + } + if (SVT == MVT::f128 || (VT == MVT::f16 && SVT == MVT::f80)) return SDValue(); diff --git a/llvm/test/CodeGen/X86/half-fneg-fabs.ll b/llvm/test/CodeGen/X86/half-fneg-fabs.ll index 98d48fe771adf..ea461a0bc741a 100644 --- a/llvm/test/CodeGen/X86/half-fneg-fabs.ll +++ b/llvm/test/CodeGen/X86/half-fneg-fabs.ll @@ -1,4 +1,4 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6 ; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+sse2 | FileCheck %s --check-prefixes=SSE2 ; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+sse2 -fast-isel | FileCheck %s --check-prefixes=SSE2-FAST ; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+avx | FileCheck %s --check-prefixes=AVX @@ -6,7 +6,6 @@ ; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+f16c | FileCheck %s --check-prefixes=F16C ; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+f16c -fast-isel | FileCheck %s --check-prefixes=F16C-FAST -; Test Floating Point Negation (fneg) define half @test_fneg(half %a) nounwind { ; SSE2-LABEL: test_fneg: ; SSE2: # %bb.0: @@ -37,11 +36,12 @@ define half @test_fneg(half %a) nounwind { ; F16C-FAST: # %bb.0: ; F16C-FAST-NEXT: vxorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 ; F16C-FAST-NEXT: retq - %res = fneg half %a + %ext = fpext half %a to float + %neg = fneg float %ext + %res = fptrunc float %neg to half ret half %res } -; Test Floating Point Absolute Value (fabs) define half @test_fabs(half %a) nounwind { ; SSE2-LABEL: test_fabs: ; SSE2: # %bb.0: @@ -72,8 +72,8 @@ define half @test_fabs(half %a) nounwind { ; F16C-FAST: # %bb.0: ; F16C-FAST-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 ; F16C-FAST-NEXT: retq - %res = call half @llvm.fabs.f16(half %a) + %ext = fpext half %a to float + %abs = call float @llvm.fabs.f32(float %ext) + %res = fptrunc float %abs to half ret half %res } - -declare half @llvm.fabs.f16(half) >From dd7c610f02e6e715fa5e68fed961eaa7006d70de Mon Sep 17 00:00:00 2001 From: Tharun V K <[email protected]> Date: Mon, 6 Apr 2026 16:49:32 +0530 Subject: [PATCH 4/4] Handle folding early in combineFP_ROUND --- llvm/lib/Target/X86/X86ISelLowering.cpp | 27 +++++++++++++------------ 1 file changed, 14 insertions(+), 13 deletions(-) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 82a8f0d1098d5..666ad075ff5f5 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -23034,15 +23034,6 @@ SDValue X86TargetLowering::LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const { MVT VT = Op.getSimpleValueType(); MVT SVT = In.getSimpleValueType(); - if (!IsStrict && VT == MVT::f16 && - (In.getOpcode() == ISD::FNEG || In.getOpcode() == ISD::FABS)) { - SDValue Inner = In.getOperand(0); - if (Inner.getOpcode() == ISD::FP_EXTEND && - Inner.getOperand(0).getValueType() == MVT::f16) { - return DAG.getNode(In.getOpcode(), DL, MVT::f16, Inner.getOperand(0)); - } - } - if (SVT == MVT::f128 || (VT == MVT::f16 && SVT == MVT::f80)) return SDValue(); @@ -62015,20 +62006,30 @@ static SDValue combineBROADCAST_LOAD(SDNode *N, SelectionDAG &DAG, static SDValue combineFP_ROUND(SDNode *N, SelectionDAG &DAG, const X86Subtarget &Subtarget) { - if (!Subtarget.hasF16C() || Subtarget.useSoftFloat()) - return SDValue(); bool IsStrict = N->isStrictFPOpcode(); EVT VT = N->getValueType(0); SDValue Src = N->getOperand(IsStrict ? 1 : 0); + SDLoc dl(N); + + if (!IsStrict && VT == MVT::f16 && + (Src.getOpcode() == ISD::FNEG || Src.getOpcode() == ISD::FABS)) { + SDValue Inner = Src.getOperand(0); + if (Inner.getOpcode() == ISD::FP_EXTEND && + Inner.getOperand(0).getValueType() == MVT::f16) { + return DAG.getNode(Src.getOpcode(), dl, MVT::f16, Inner.getOperand(0)); + } + } + + if (!Subtarget.hasF16C() || Subtarget.useSoftFloat()) + return SDValue(); + EVT SrcVT = Src.getValueType(); if (!VT.isVector() || VT.getVectorElementType() != MVT::f16 || SrcVT.getVectorElementType() != MVT::f32) return SDValue(); - SDLoc dl(N); - SDValue Cvt, Chain; unsigned NumElts = VT.getVectorNumElements(); if (Subtarget.hasFP16()) { _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
