https://github.com/spavloff created https://github.com/llvm/llvm-project/pull/180480
Previously, the strictfp variants of rounding operations (FLOOR, ROUND, etc) were handled in SelectionDAG via the default expansion, which splits vector operation into scalar ones. This results in less efficient code. This change declares the strictfp counterparts of the vector rounding operations as legal and modifies existing rules in tablegen descriptions accordingly. >From fe7377442a7cc461af0a29baeb421fc910fe25bd Mon Sep 17 00:00:00 2001 From: Serge Pavlov <[email protected]> Date: Thu, 5 Feb 2026 14:29:57 +0700 Subject: [PATCH] [ARM] Treat strictfp vector rounding operations as legal Previously, the strictfp variants of rounding operations (FLOOR, ROUND, etc) were handled in SelectionDAG via the default expansion, which splits vector operation into scalar ones. This results in less efficient code. This change declares the strictfp counterparts of the vector rounding operations as legal and modifies existing rules in tablegen descriptions accordingly. --- llvm/lib/Target/ARM/ARMISelLowering.cpp | 20 ++++++ llvm/lib/Target/ARM/ARMInstrMVE.td | 12 ++-- llvm/lib/Target/ARM/ARMInstrNEON.td | 12 ++-- .../CodeGen/ARM/fp-intrinsics-vector-v8.ll | 65 +++++++------------ .../mve-intrinsics/strict-intrinsics.ll | 17 +---- 5 files changed, 58 insertions(+), 68 deletions(-) diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp index b6a66d68fe5f2..86fbb1460f492 100644 --- a/llvm/lib/Target/ARM/ARMISelLowering.cpp +++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp @@ -379,6 +379,13 @@ void ARMTargetLowering::addMVEVectorTypes(bool HasMVEFP) { setOperationAction(ISD::VECREDUCE_FMIN, VT, Custom); setOperationAction(ISD::VECREDUCE_FMAX, VT, Custom); + setOperationAction(ISD::STRICT_FROUND, VT, Legal); + setOperationAction(ISD::STRICT_FROUNDEVEN, VT, Legal); + setOperationAction(ISD::STRICT_FRINT, VT, Legal); + setOperationAction(ISD::STRICT_FTRUNC, VT, Legal); + setOperationAction(ISD::STRICT_FFLOOR, VT, Legal); + setOperationAction(ISD::STRICT_FCEIL, VT, Legal); + // No native support for these. setOperationAction(ISD::FDIV, VT, Expand); setOperationAction(ISD::FREM, VT, Expand); @@ -1356,6 +1363,19 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM_, setOperationAction(ISD::FTRUNC, MVT::v4f32, Legal); setOperationAction(ISD::FRINT, MVT::v2f32, Legal); setOperationAction(ISD::FRINT, MVT::v4f32, Legal); + + setOperationAction(ISD::STRICT_FFLOOR, MVT::v2f32, Legal); + setOperationAction(ISD::STRICT_FFLOOR, MVT::v4f32, Legal); + setOperationAction(ISD::STRICT_FROUND, MVT::v2f32, Legal); + setOperationAction(ISD::STRICT_FROUND, MVT::v4f32, Legal); + setOperationAction(ISD::STRICT_FROUNDEVEN, MVT::v2f32, Legal); + setOperationAction(ISD::STRICT_FROUNDEVEN, MVT::v4f32, Legal); + setOperationAction(ISD::STRICT_FCEIL, MVT::v2f32, Legal); + setOperationAction(ISD::STRICT_FCEIL, MVT::v4f32, Legal); + setOperationAction(ISD::STRICT_FTRUNC, MVT::v2f32, Legal); + setOperationAction(ISD::STRICT_FTRUNC, MVT::v4f32, Legal); + setOperationAction(ISD::STRICT_FRINT, MVT::v2f32, Legal); + setOperationAction(ISD::STRICT_FRINT, MVT::v4f32, Legal); } if (Subtarget->hasFullFP16()) { diff --git a/llvm/lib/Target/ARM/ARMInstrMVE.td b/llvm/lib/Target/ARM/ARMInstrMVE.td index 1e9c141f13f83..8f874fed991f1 100644 --- a/llvm/lib/Target/ARM/ARMInstrMVE.td +++ b/llvm/lib/Target/ARM/ARMInstrMVE.td @@ -3605,12 +3605,12 @@ multiclass MVE_VRINT_m<MVEVectorVTInfo VTI, string suffix, bits<3> opcode, } multiclass MVE_VRINT_ops<MVEVectorVTInfo VTI> { - defm N : MVE_VRINT_m<VTI, "n", 0b000, froundeven>; - defm X : MVE_VRINT_m<VTI, "x", 0b001, frint>; - defm A : MVE_VRINT_m<VTI, "a", 0b010, fround>; - defm Z : MVE_VRINT_m<VTI, "z", 0b011, ftrunc>; - defm M : MVE_VRINT_m<VTI, "m", 0b101, ffloor>; - defm P : MVE_VRINT_m<VTI, "p", 0b111, fceil>; + defm N : MVE_VRINT_m<VTI, "n", 0b000, any_froundeven>; + defm X : MVE_VRINT_m<VTI, "x", 0b001, any_frint>; + defm A : MVE_VRINT_m<VTI, "a", 0b010, any_fround>; + defm Z : MVE_VRINT_m<VTI, "z", 0b011, any_ftrunc>; + defm M : MVE_VRINT_m<VTI, "m", 0b101, any_ffloor>; + defm P : MVE_VRINT_m<VTI, "p", 0b111, any_fceil>; } defm MVE_VRINTf16 : MVE_VRINT_ops<MVE_v8f16>; diff --git a/llvm/lib/Target/ARM/ARMInstrNEON.td b/llvm/lib/Target/ARM/ARMInstrNEON.td index e2450490c83b8..f31938bb43d16 100644 --- a/llvm/lib/Target/ARM/ARMInstrNEON.td +++ b/llvm/lib/Target/ARM/ARMInstrNEON.td @@ -7349,12 +7349,12 @@ multiclass VRINT_FPI<string op, bits<3> op9_7, SDPatternOperator Int> { } } -defm VRINTNN : VRINT_FPI<"n", 0b000, froundeven>; -defm VRINTXN : VRINT_FPI<"x", 0b001, frint>; -defm VRINTAN : VRINT_FPI<"a", 0b010, fround>; -defm VRINTZN : VRINT_FPI<"z", 0b011, ftrunc>; -defm VRINTMN : VRINT_FPI<"m", 0b101, ffloor>; -defm VRINTPN : VRINT_FPI<"p", 0b111, fceil>; +defm VRINTNN : VRINT_FPI<"n", 0b000, any_froundeven>; +defm VRINTXN : VRINT_FPI<"x", 0b001, any_frint>; +defm VRINTAN : VRINT_FPI<"a", 0b010, any_fround>; +defm VRINTZN : VRINT_FPI<"z", 0b011, any_ftrunc>; +defm VRINTMN : VRINT_FPI<"m", 0b101, any_ffloor>; +defm VRINTPN : VRINT_FPI<"p", 0b111, any_fceil>; // Cryptography instructions let PostEncoderMethod = "NEONThumb2DataIPostEncoder", diff --git a/llvm/test/CodeGen/ARM/fp-intrinsics-vector-v8.ll b/llvm/test/CodeGen/ARM/fp-intrinsics-vector-v8.ll index 061834ab4eaf8..8cf3d40795f86 100644 --- a/llvm/test/CodeGen/ARM/fp-intrinsics-vector-v8.ll +++ b/llvm/test/CodeGen/ARM/fp-intrinsics-vector-v8.ll @@ -21,14 +21,11 @@ define <4 x float> @nearbyint_v4f32(<4 x float> %x) #0 { define <4 x float> @rint_v4f32(<4 x float> %x) #0 { ; CHECK-LABEL: rint_v4f32: ; CHECK: @ %bb.0: -; CHECK-NEXT: vmov d1, r2, r3 -; CHECK-NEXT: vmov d0, r0, r1 -; CHECK-NEXT: vrintx.f32 s7, s3 -; CHECK-NEXT: vrintx.f32 s6, s2 -; CHECK-NEXT: vrintx.f32 s5, s1 -; CHECK-NEXT: vrintx.f32 s4, s0 -; CHECK-NEXT: vmov r2, r3, d3 -; CHECK-NEXT: vmov r0, r1, d2 +; CHECK-NEXT: vmov d17, r2, r3 +; CHECK-NEXT: vmov d16, r0, r1 +; CHECK-NEXT: vrintx.f32 q8, q8 +; CHECK-NEXT: vmov r0, r1, d16 +; CHECK-NEXT: vmov r2, r3, d17 ; CHECK-NEXT: bx lr %val = call <4 x float> @llvm.experimental.constrained.rint.v4f32(<4 x float> %x, metadata !"round.dynamic", metadata !"fpexcept.strict") #0 ret <4 x float> %val @@ -37,14 +34,11 @@ define <4 x float> @rint_v4f32(<4 x float> %x) #0 { define <4 x float> @round_v4f32(<4 x float> %x) #0 { ; CHECK-LABEL: round_v4f32: ; CHECK: @ %bb.0: -; CHECK-NEXT: vmov d1, r2, r3 -; CHECK-NEXT: vmov d0, r0, r1 -; CHECK-NEXT: vrinta.f32 s7, s3 -; CHECK-NEXT: vrinta.f32 s6, s2 -; CHECK-NEXT: vrinta.f32 s5, s1 -; CHECK-NEXT: vrinta.f32 s4, s0 -; CHECK-NEXT: vmov r2, r3, d3 -; CHECK-NEXT: vmov r0, r1, d2 +; CHECK-NEXT: vmov d17, r2, r3 +; CHECK-NEXT: vmov d16, r0, r1 +; CHECK-NEXT: vrinta.f32 q8, q8 +; CHECK-NEXT: vmov r0, r1, d16 +; CHECK-NEXT: vmov r2, r3, d17 ; CHECK-NEXT: bx lr %val = call <4 x float> @llvm.experimental.constrained.round.v4f32(<4 x float> %x, metadata !"fpexcept.strict") #0 ret <4 x float> %val @@ -53,14 +47,11 @@ define <4 x float> @round_v4f32(<4 x float> %x) #0 { define <4 x float> @roundeven_v4f32(<4 x float> %x) #0 { ; CHECK-LABEL: roundeven_v4f32: ; CHECK: @ %bb.0: -; CHECK-NEXT: vmov d1, r2, r3 -; CHECK-NEXT: vmov d0, r0, r1 -; CHECK-NEXT: vrintn.f32 s7, s3 -; CHECK-NEXT: vrintn.f32 s6, s2 -; CHECK-NEXT: vrintn.f32 s5, s1 -; CHECK-NEXT: vrintn.f32 s4, s0 -; CHECK-NEXT: vmov r2, r3, d3 -; CHECK-NEXT: vmov r0, r1, d2 +; CHECK-NEXT: vmov d17, r2, r3 +; CHECK-NEXT: vmov d16, r0, r1 +; CHECK-NEXT: vrintn.f32 q8, q8 +; CHECK-NEXT: vmov r0, r1, d16 +; CHECK-NEXT: vmov r2, r3, d17 ; CHECK-NEXT: bx lr %val = call <4 x float> @llvm.experimental.constrained.roundeven.v4f32(<4 x float> %x, metadata !"fpexcept.strict") #0 ret <4 x float> %val @@ -69,14 +60,11 @@ define <4 x float> @roundeven_v4f32(<4 x float> %x) #0 { define <4 x float> @floor_v4f32(<4 x float> %x) #0 { ; CHECK-LABEL: floor_v4f32: ; CHECK: @ %bb.0: -; CHECK-NEXT: vmov d1, r2, r3 -; CHECK-NEXT: vmov d0, r0, r1 -; CHECK-NEXT: vrintm.f32 s7, s3 -; CHECK-NEXT: vrintm.f32 s6, s2 -; CHECK-NEXT: vrintm.f32 s5, s1 -; CHECK-NEXT: vrintm.f32 s4, s0 -; CHECK-NEXT: vmov r2, r3, d3 -; CHECK-NEXT: vmov r0, r1, d2 +; CHECK-NEXT: vmov d17, r2, r3 +; CHECK-NEXT: vmov d16, r0, r1 +; CHECK-NEXT: vrintm.f32 q8, q8 +; CHECK-NEXT: vmov r0, r1, d16 +; CHECK-NEXT: vmov r2, r3, d17 ; CHECK-NEXT: bx lr %val = call <4 x float> @llvm.experimental.constrained.floor.v4f32(<4 x float> %x, metadata !"fpexcept.strict") #0 ret <4 x float> %val @@ -85,14 +73,11 @@ define <4 x float> @floor_v4f32(<4 x float> %x) #0 { define <4 x float> @ceil_v4f32(<4 x float> %x) #0 { ; CHECK-LABEL: ceil_v4f32: ; CHECK: @ %bb.0: -; CHECK-NEXT: vmov d1, r2, r3 -; CHECK-NEXT: vmov d0, r0, r1 -; CHECK-NEXT: vrintp.f32 s7, s3 -; CHECK-NEXT: vrintp.f32 s6, s2 -; CHECK-NEXT: vrintp.f32 s5, s1 -; CHECK-NEXT: vrintp.f32 s4, s0 -; CHECK-NEXT: vmov r2, r3, d3 -; CHECK-NEXT: vmov r0, r1, d2 +; CHECK-NEXT: vmov d17, r2, r3 +; CHECK-NEXT: vmov d16, r0, r1 +; CHECK-NEXT: vrintp.f32 q8, q8 +; CHECK-NEXT: vmov r0, r1, d16 +; CHECK-NEXT: vmov r2, r3, d17 ; CHECK-NEXT: bx lr %val = call <4 x float> @llvm.experimental.constrained.ceil.v4f32(<4 x float> %x, metadata !"fpexcept.strict") #0 ret <4 x float> %val diff --git a/llvm/test/CodeGen/Thumb2/mve-intrinsics/strict-intrinsics.ll b/llvm/test/CodeGen/Thumb2/mve-intrinsics/strict-intrinsics.ll index 18a3fbc26219a..8dd02491d7040 100644 --- a/llvm/test/CodeGen/Thumb2/mve-intrinsics/strict-intrinsics.ll +++ b/llvm/test/CodeGen/Thumb2/mve-intrinsics/strict-intrinsics.ll @@ -343,22 +343,7 @@ entry: define arm_aapcs_vfpcc <8 x half> @test_roundeven_f16(<8 x half> %a) #0 { ; CHECK-LABEL: test_roundeven_f16: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vmovx.f16 s4, s0 -; CHECK-NEXT: vrintn.f16 s0, s0 -; CHECK-NEXT: vrintn.f16 s4, s4 -; CHECK-NEXT: vins.f16 s0, s4 -; CHECK-NEXT: vmovx.f16 s4, s1 -; CHECK-NEXT: vrintn.f16 s4, s4 -; CHECK-NEXT: vrintn.f16 s1, s1 -; CHECK-NEXT: vins.f16 s1, s4 -; CHECK-NEXT: vmovx.f16 s4, s2 -; CHECK-NEXT: vrintn.f16 s4, s4 -; CHECK-NEXT: vrintn.f16 s2, s2 -; CHECK-NEXT: vins.f16 s2, s4 -; CHECK-NEXT: vmovx.f16 s4, s3 -; CHECK-NEXT: vrintn.f16 s4, s4 -; CHECK-NEXT: vrintn.f16 s3, s3 -; CHECK-NEXT: vins.f16 s3, s4 +; CHECK-NEXT: vrintn.f16 q0, q0 ; CHECK-NEXT: bx lr entry: %0 = tail call <8 x half> @llvm.experimental.constrained.roundeven.v8f16(<8 x half> %a, metadata !"fpexcept.strict") _______________________________________________ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
