https://github.com/spavloff created 
https://github.com/llvm/llvm-project/pull/180480

Previously, the strictfp variants of rounding operations (FLOOR, ROUND, etc) 
were handled in SelectionDAG via the default expansion, which splits vector 
operation into scalar ones. This results in less efficient code.

This change declares the strictfp counterparts of the vector rounding 
operations as legal and modifies existing rules in tablegen descriptions 
accordingly.

>From fe7377442a7cc461af0a29baeb421fc910fe25bd Mon Sep 17 00:00:00 2001
From: Serge Pavlov <[email protected]>
Date: Thu, 5 Feb 2026 14:29:57 +0700
Subject: [PATCH] [ARM] Treat strictfp vector rounding operations as legal

Previously, the strictfp variants of rounding operations (FLOOR, ROUND,
etc) were handled in SelectionDAG via the default expansion, which
splits vector operation into scalar ones. This results in less efficient
code.

This change declares the strictfp counterparts of the vector rounding
operations as legal and modifies existing rules in tablegen descriptions
accordingly.
---
 llvm/lib/Target/ARM/ARMISelLowering.cpp       | 20 ++++++
 llvm/lib/Target/ARM/ARMInstrMVE.td            | 12 ++--
 llvm/lib/Target/ARM/ARMInstrNEON.td           | 12 ++--
 .../CodeGen/ARM/fp-intrinsics-vector-v8.ll    | 65 +++++++------------
 .../mve-intrinsics/strict-intrinsics.ll       | 17 +----
 5 files changed, 58 insertions(+), 68 deletions(-)

diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp 
b/llvm/lib/Target/ARM/ARMISelLowering.cpp
index b6a66d68fe5f2..86fbb1460f492 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.cpp
+++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp
@@ -379,6 +379,13 @@ void ARMTargetLowering::addMVEVectorTypes(bool HasMVEFP) {
       setOperationAction(ISD::VECREDUCE_FMIN, VT, Custom);
       setOperationAction(ISD::VECREDUCE_FMAX, VT, Custom);
 
+      setOperationAction(ISD::STRICT_FROUND, VT, Legal);
+      setOperationAction(ISD::STRICT_FROUNDEVEN, VT, Legal);
+      setOperationAction(ISD::STRICT_FRINT, VT, Legal);
+      setOperationAction(ISD::STRICT_FTRUNC, VT, Legal);
+      setOperationAction(ISD::STRICT_FFLOOR, VT, Legal);
+      setOperationAction(ISD::STRICT_FCEIL, VT, Legal);
+
       // No native support for these.
       setOperationAction(ISD::FDIV, VT, Expand);
       setOperationAction(ISD::FREM, VT, Expand);
@@ -1356,6 +1363,19 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine 
&TM_,
       setOperationAction(ISD::FTRUNC, MVT::v4f32, Legal);
       setOperationAction(ISD::FRINT, MVT::v2f32, Legal);
       setOperationAction(ISD::FRINT, MVT::v4f32, Legal);
+
+      setOperationAction(ISD::STRICT_FFLOOR, MVT::v2f32, Legal);
+      setOperationAction(ISD::STRICT_FFLOOR, MVT::v4f32, Legal);
+      setOperationAction(ISD::STRICT_FROUND, MVT::v2f32, Legal);
+      setOperationAction(ISD::STRICT_FROUND, MVT::v4f32, Legal);
+      setOperationAction(ISD::STRICT_FROUNDEVEN, MVT::v2f32, Legal);
+      setOperationAction(ISD::STRICT_FROUNDEVEN, MVT::v4f32, Legal);
+      setOperationAction(ISD::STRICT_FCEIL, MVT::v2f32, Legal);
+      setOperationAction(ISD::STRICT_FCEIL, MVT::v4f32, Legal);
+      setOperationAction(ISD::STRICT_FTRUNC, MVT::v2f32, Legal);
+      setOperationAction(ISD::STRICT_FTRUNC, MVT::v4f32, Legal);
+      setOperationAction(ISD::STRICT_FRINT, MVT::v2f32, Legal);
+      setOperationAction(ISD::STRICT_FRINT, MVT::v4f32, Legal);
     }
 
     if (Subtarget->hasFullFP16()) {
diff --git a/llvm/lib/Target/ARM/ARMInstrMVE.td 
b/llvm/lib/Target/ARM/ARMInstrMVE.td
index 1e9c141f13f83..8f874fed991f1 100644
--- a/llvm/lib/Target/ARM/ARMInstrMVE.td
+++ b/llvm/lib/Target/ARM/ARMInstrMVE.td
@@ -3605,12 +3605,12 @@ multiclass MVE_VRINT_m<MVEVectorVTInfo VTI, string 
suffix, bits<3> opcode,
 }
 
 multiclass MVE_VRINT_ops<MVEVectorVTInfo VTI> {
-  defm N : MVE_VRINT_m<VTI, "n", 0b000, froundeven>;
-  defm X : MVE_VRINT_m<VTI, "x", 0b001, frint>;
-  defm A : MVE_VRINT_m<VTI, "a", 0b010, fround>;
-  defm Z : MVE_VRINT_m<VTI, "z", 0b011, ftrunc>;
-  defm M : MVE_VRINT_m<VTI, "m", 0b101, ffloor>;
-  defm P : MVE_VRINT_m<VTI, "p", 0b111, fceil>;
+  defm N : MVE_VRINT_m<VTI, "n", 0b000, any_froundeven>;
+  defm X : MVE_VRINT_m<VTI, "x", 0b001, any_frint>;
+  defm A : MVE_VRINT_m<VTI, "a", 0b010, any_fround>;
+  defm Z : MVE_VRINT_m<VTI, "z", 0b011, any_ftrunc>;
+  defm M : MVE_VRINT_m<VTI, "m", 0b101, any_ffloor>;
+  defm P : MVE_VRINT_m<VTI, "p", 0b111, any_fceil>;
 }
 
 defm MVE_VRINTf16 : MVE_VRINT_ops<MVE_v8f16>;
diff --git a/llvm/lib/Target/ARM/ARMInstrNEON.td 
b/llvm/lib/Target/ARM/ARMInstrNEON.td
index e2450490c83b8..f31938bb43d16 100644
--- a/llvm/lib/Target/ARM/ARMInstrNEON.td
+++ b/llvm/lib/Target/ARM/ARMInstrNEON.td
@@ -7349,12 +7349,12 @@ multiclass VRINT_FPI<string op, bits<3> op9_7, 
SDPatternOperator Int> {
   }
 }
 
-defm VRINTNN : VRINT_FPI<"n", 0b000, froundeven>;
-defm VRINTXN : VRINT_FPI<"x", 0b001, frint>;
-defm VRINTAN : VRINT_FPI<"a", 0b010, fround>;
-defm VRINTZN : VRINT_FPI<"z", 0b011, ftrunc>;
-defm VRINTMN : VRINT_FPI<"m", 0b101, ffloor>;
-defm VRINTPN : VRINT_FPI<"p", 0b111, fceil>;
+defm VRINTNN : VRINT_FPI<"n", 0b000, any_froundeven>;
+defm VRINTXN : VRINT_FPI<"x", 0b001, any_frint>;
+defm VRINTAN : VRINT_FPI<"a", 0b010, any_fround>;
+defm VRINTZN : VRINT_FPI<"z", 0b011, any_ftrunc>;
+defm VRINTMN : VRINT_FPI<"m", 0b101, any_ffloor>;
+defm VRINTPN : VRINT_FPI<"p", 0b111, any_fceil>;
 
 // Cryptography instructions
 let PostEncoderMethod = "NEONThumb2DataIPostEncoder",
diff --git a/llvm/test/CodeGen/ARM/fp-intrinsics-vector-v8.ll 
b/llvm/test/CodeGen/ARM/fp-intrinsics-vector-v8.ll
index 061834ab4eaf8..8cf3d40795f86 100644
--- a/llvm/test/CodeGen/ARM/fp-intrinsics-vector-v8.ll
+++ b/llvm/test/CodeGen/ARM/fp-intrinsics-vector-v8.ll
@@ -21,14 +21,11 @@ define <4 x float> @nearbyint_v4f32(<4 x float> %x) #0 {
 define <4 x float> @rint_v4f32(<4 x float> %x) #0 {
 ; CHECK-LABEL: rint_v4f32:
 ; CHECK:       @ %bb.0:
-; CHECK-NEXT:    vmov d1, r2, r3
-; CHECK-NEXT:    vmov d0, r0, r1
-; CHECK-NEXT:    vrintx.f32 s7, s3
-; CHECK-NEXT:    vrintx.f32 s6, s2
-; CHECK-NEXT:    vrintx.f32 s5, s1
-; CHECK-NEXT:    vrintx.f32 s4, s0
-; CHECK-NEXT:    vmov r2, r3, d3
-; CHECK-NEXT:    vmov r0, r1, d2
+; CHECK-NEXT:    vmov d17, r2, r3
+; CHECK-NEXT:    vmov d16, r0, r1
+; CHECK-NEXT:    vrintx.f32 q8, q8
+; CHECK-NEXT:    vmov r0, r1, d16
+; CHECK-NEXT:    vmov r2, r3, d17
 ; CHECK-NEXT:    bx lr
   %val = call <4 x float> @llvm.experimental.constrained.rint.v4f32(<4 x 
float> %x, metadata !"round.dynamic", metadata !"fpexcept.strict") #0
   ret <4 x float> %val
@@ -37,14 +34,11 @@ define <4 x float> @rint_v4f32(<4 x float> %x) #0 {
 define <4 x float> @round_v4f32(<4 x float> %x) #0 {
 ; CHECK-LABEL: round_v4f32:
 ; CHECK:       @ %bb.0:
-; CHECK-NEXT:    vmov d1, r2, r3
-; CHECK-NEXT:    vmov d0, r0, r1
-; CHECK-NEXT:    vrinta.f32 s7, s3
-; CHECK-NEXT:    vrinta.f32 s6, s2
-; CHECK-NEXT:    vrinta.f32 s5, s1
-; CHECK-NEXT:    vrinta.f32 s4, s0
-; CHECK-NEXT:    vmov r2, r3, d3
-; CHECK-NEXT:    vmov r0, r1, d2
+; CHECK-NEXT:    vmov d17, r2, r3
+; CHECK-NEXT:    vmov d16, r0, r1
+; CHECK-NEXT:    vrinta.f32 q8, q8
+; CHECK-NEXT:    vmov r0, r1, d16
+; CHECK-NEXT:    vmov r2, r3, d17
 ; CHECK-NEXT:    bx lr
   %val = call <4 x float> @llvm.experimental.constrained.round.v4f32(<4 x 
float> %x, metadata !"fpexcept.strict") #0
   ret <4 x float> %val
@@ -53,14 +47,11 @@ define <4 x float> @round_v4f32(<4 x float> %x) #0 {
 define <4 x float> @roundeven_v4f32(<4 x float> %x) #0 {
 ; CHECK-LABEL: roundeven_v4f32:
 ; CHECK:       @ %bb.0:
-; CHECK-NEXT:    vmov d1, r2, r3
-; CHECK-NEXT:    vmov d0, r0, r1
-; CHECK-NEXT:    vrintn.f32 s7, s3
-; CHECK-NEXT:    vrintn.f32 s6, s2
-; CHECK-NEXT:    vrintn.f32 s5, s1
-; CHECK-NEXT:    vrintn.f32 s4, s0
-; CHECK-NEXT:    vmov r2, r3, d3
-; CHECK-NEXT:    vmov r0, r1, d2
+; CHECK-NEXT:    vmov d17, r2, r3
+; CHECK-NEXT:    vmov d16, r0, r1
+; CHECK-NEXT:    vrintn.f32 q8, q8
+; CHECK-NEXT:    vmov r0, r1, d16
+; CHECK-NEXT:    vmov r2, r3, d17
 ; CHECK-NEXT:    bx lr
   %val = call <4 x float> @llvm.experimental.constrained.roundeven.v4f32(<4 x 
float> %x, metadata !"fpexcept.strict") #0
   ret <4 x float> %val
@@ -69,14 +60,11 @@ define <4 x float> @roundeven_v4f32(<4 x float> %x) #0 {
 define <4 x float> @floor_v4f32(<4 x float> %x) #0 {
 ; CHECK-LABEL: floor_v4f32:
 ; CHECK:       @ %bb.0:
-; CHECK-NEXT:    vmov d1, r2, r3
-; CHECK-NEXT:    vmov d0, r0, r1
-; CHECK-NEXT:    vrintm.f32 s7, s3
-; CHECK-NEXT:    vrintm.f32 s6, s2
-; CHECK-NEXT:    vrintm.f32 s5, s1
-; CHECK-NEXT:    vrintm.f32 s4, s0
-; CHECK-NEXT:    vmov r2, r3, d3
-; CHECK-NEXT:    vmov r0, r1, d2
+; CHECK-NEXT:    vmov d17, r2, r3
+; CHECK-NEXT:    vmov d16, r0, r1
+; CHECK-NEXT:    vrintm.f32 q8, q8
+; CHECK-NEXT:    vmov r0, r1, d16
+; CHECK-NEXT:    vmov r2, r3, d17
 ; CHECK-NEXT:    bx lr
   %val = call <4 x float> @llvm.experimental.constrained.floor.v4f32(<4 x 
float> %x, metadata !"fpexcept.strict") #0
   ret <4 x float> %val
@@ -85,14 +73,11 @@ define <4 x float> @floor_v4f32(<4 x float> %x) #0 {
 define <4 x float> @ceil_v4f32(<4 x float> %x) #0 {
 ; CHECK-LABEL: ceil_v4f32:
 ; CHECK:       @ %bb.0:
-; CHECK-NEXT:    vmov d1, r2, r3
-; CHECK-NEXT:    vmov d0, r0, r1
-; CHECK-NEXT:    vrintp.f32 s7, s3
-; CHECK-NEXT:    vrintp.f32 s6, s2
-; CHECK-NEXT:    vrintp.f32 s5, s1
-; CHECK-NEXT:    vrintp.f32 s4, s0
-; CHECK-NEXT:    vmov r2, r3, d3
-; CHECK-NEXT:    vmov r0, r1, d2
+; CHECK-NEXT:    vmov d17, r2, r3
+; CHECK-NEXT:    vmov d16, r0, r1
+; CHECK-NEXT:    vrintp.f32 q8, q8
+; CHECK-NEXT:    vmov r0, r1, d16
+; CHECK-NEXT:    vmov r2, r3, d17
 ; CHECK-NEXT:    bx lr
   %val = call <4 x float> @llvm.experimental.constrained.ceil.v4f32(<4 x 
float> %x, metadata !"fpexcept.strict") #0
   ret <4 x float> %val
diff --git a/llvm/test/CodeGen/Thumb2/mve-intrinsics/strict-intrinsics.ll 
b/llvm/test/CodeGen/Thumb2/mve-intrinsics/strict-intrinsics.ll
index 18a3fbc26219a..8dd02491d7040 100644
--- a/llvm/test/CodeGen/Thumb2/mve-intrinsics/strict-intrinsics.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-intrinsics/strict-intrinsics.ll
@@ -343,22 +343,7 @@ entry:
 define arm_aapcs_vfpcc <8 x half> @test_roundeven_f16(<8 x half> %a) #0 {
 ; CHECK-LABEL: test_roundeven_f16:
 ; CHECK:       @ %bb.0: @ %entry
-; CHECK-NEXT:    vmovx.f16 s4, s0
-; CHECK-NEXT:    vrintn.f16 s0, s0
-; CHECK-NEXT:    vrintn.f16 s4, s4
-; CHECK-NEXT:    vins.f16 s0, s4
-; CHECK-NEXT:    vmovx.f16 s4, s1
-; CHECK-NEXT:    vrintn.f16 s4, s4
-; CHECK-NEXT:    vrintn.f16 s1, s1
-; CHECK-NEXT:    vins.f16 s1, s4
-; CHECK-NEXT:    vmovx.f16 s4, s2
-; CHECK-NEXT:    vrintn.f16 s4, s4
-; CHECK-NEXT:    vrintn.f16 s2, s2
-; CHECK-NEXT:    vins.f16 s2, s4
-; CHECK-NEXT:    vmovx.f16 s4, s3
-; CHECK-NEXT:    vrintn.f16 s4, s4
-; CHECK-NEXT:    vrintn.f16 s3, s3
-; CHECK-NEXT:    vins.f16 s3, s4
+; CHECK-NEXT:    vrintn.f16 q0, q0
 ; CHECK-NEXT:    bx lr
 entry:
   %0 = tail call <8 x half> @llvm.experimental.constrained.roundeven.v8f16(<8 
x half> %a, metadata !"fpexcept.strict")

_______________________________________________
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

Reply via email to