https://github.com/spavloff updated 
https://github.com/llvm/llvm-project/pull/180480

>From 515a46e6d2576a57443281cf3a4e319380263af2 Mon Sep 17 00:00:00 2001
From: Serge Pavlov <[email protected]>
Date: Thu, 5 Feb 2026 14:29:57 +0700
Subject: [PATCH 1/4] [ARM] Treat strictfp vector rounding operations as legal

Previously, the strictfp variants of rounding operations (FLOOR, ROUND,
etc) were handled in SelectionDAG via the default expansion, which
splits vector operation into scalar ones. This results in less efficient
code.

This change declares the strictfp counterparts of the vector rounding
operations as legal and modifies existing rules in tablegen descriptions
accordingly.
---
 llvm/lib/Target/ARM/ARMISelLowering.cpp       | 20 ++++++++
 llvm/lib/Target/ARM/ARMInstrMVE.td            | 12 ++---
 llvm/lib/Target/ARM/ARMInstrNEON.td           | 12 ++---
 .../CodeGen/ARM/fp-intrinsics-vector-v8.ll    | 50 +++++++++----------
 4 files changed, 57 insertions(+), 37 deletions(-)

diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp 
b/llvm/lib/Target/ARM/ARMISelLowering.cpp
index b6a66d68fe5f2..86fbb1460f492 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.cpp
+++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp
@@ -379,6 +379,13 @@ void ARMTargetLowering::addMVEVectorTypes(bool HasMVEFP) {
       setOperationAction(ISD::VECREDUCE_FMIN, VT, Custom);
       setOperationAction(ISD::VECREDUCE_FMAX, VT, Custom);
 
+      setOperationAction(ISD::STRICT_FROUND, VT, Legal);
+      setOperationAction(ISD::STRICT_FROUNDEVEN, VT, Legal);
+      setOperationAction(ISD::STRICT_FRINT, VT, Legal);
+      setOperationAction(ISD::STRICT_FTRUNC, VT, Legal);
+      setOperationAction(ISD::STRICT_FFLOOR, VT, Legal);
+      setOperationAction(ISD::STRICT_FCEIL, VT, Legal);
+
       // No native support for these.
       setOperationAction(ISD::FDIV, VT, Expand);
       setOperationAction(ISD::FREM, VT, Expand);
@@ -1356,6 +1363,19 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine 
&TM_,
       setOperationAction(ISD::FTRUNC, MVT::v4f32, Legal);
       setOperationAction(ISD::FRINT, MVT::v2f32, Legal);
       setOperationAction(ISD::FRINT, MVT::v4f32, Legal);
+
+      setOperationAction(ISD::STRICT_FFLOOR, MVT::v2f32, Legal);
+      setOperationAction(ISD::STRICT_FFLOOR, MVT::v4f32, Legal);
+      setOperationAction(ISD::STRICT_FROUND, MVT::v2f32, Legal);
+      setOperationAction(ISD::STRICT_FROUND, MVT::v4f32, Legal);
+      setOperationAction(ISD::STRICT_FROUNDEVEN, MVT::v2f32, Legal);
+      setOperationAction(ISD::STRICT_FROUNDEVEN, MVT::v4f32, Legal);
+      setOperationAction(ISD::STRICT_FCEIL, MVT::v2f32, Legal);
+      setOperationAction(ISD::STRICT_FCEIL, MVT::v4f32, Legal);
+      setOperationAction(ISD::STRICT_FTRUNC, MVT::v2f32, Legal);
+      setOperationAction(ISD::STRICT_FTRUNC, MVT::v4f32, Legal);
+      setOperationAction(ISD::STRICT_FRINT, MVT::v2f32, Legal);
+      setOperationAction(ISD::STRICT_FRINT, MVT::v4f32, Legal);
     }
 
     if (Subtarget->hasFullFP16()) {
diff --git a/llvm/lib/Target/ARM/ARMInstrMVE.td 
b/llvm/lib/Target/ARM/ARMInstrMVE.td
index 1e9c141f13f83..8f874fed991f1 100644
--- a/llvm/lib/Target/ARM/ARMInstrMVE.td
+++ b/llvm/lib/Target/ARM/ARMInstrMVE.td
@@ -3605,12 +3605,12 @@ multiclass MVE_VRINT_m<MVEVectorVTInfo VTI, string 
suffix, bits<3> opcode,
 }
 
 multiclass MVE_VRINT_ops<MVEVectorVTInfo VTI> {
-  defm N : MVE_VRINT_m<VTI, "n", 0b000, froundeven>;
-  defm X : MVE_VRINT_m<VTI, "x", 0b001, frint>;
-  defm A : MVE_VRINT_m<VTI, "a", 0b010, fround>;
-  defm Z : MVE_VRINT_m<VTI, "z", 0b011, ftrunc>;
-  defm M : MVE_VRINT_m<VTI, "m", 0b101, ffloor>;
-  defm P : MVE_VRINT_m<VTI, "p", 0b111, fceil>;
+  defm N : MVE_VRINT_m<VTI, "n", 0b000, any_froundeven>;
+  defm X : MVE_VRINT_m<VTI, "x", 0b001, any_frint>;
+  defm A : MVE_VRINT_m<VTI, "a", 0b010, any_fround>;
+  defm Z : MVE_VRINT_m<VTI, "z", 0b011, any_ftrunc>;
+  defm M : MVE_VRINT_m<VTI, "m", 0b101, any_ffloor>;
+  defm P : MVE_VRINT_m<VTI, "p", 0b111, any_fceil>;
 }
 
 defm MVE_VRINTf16 : MVE_VRINT_ops<MVE_v8f16>;
diff --git a/llvm/lib/Target/ARM/ARMInstrNEON.td 
b/llvm/lib/Target/ARM/ARMInstrNEON.td
index e2450490c83b8..f31938bb43d16 100644
--- a/llvm/lib/Target/ARM/ARMInstrNEON.td
+++ b/llvm/lib/Target/ARM/ARMInstrNEON.td
@@ -7349,12 +7349,12 @@ multiclass VRINT_FPI<string op, bits<3> op9_7, 
SDPatternOperator Int> {
   }
 }
 
-defm VRINTNN : VRINT_FPI<"n", 0b000, froundeven>;
-defm VRINTXN : VRINT_FPI<"x", 0b001, frint>;
-defm VRINTAN : VRINT_FPI<"a", 0b010, fround>;
-defm VRINTZN : VRINT_FPI<"z", 0b011, ftrunc>;
-defm VRINTMN : VRINT_FPI<"m", 0b101, ffloor>;
-defm VRINTPN : VRINT_FPI<"p", 0b111, fceil>;
+defm VRINTNN : VRINT_FPI<"n", 0b000, any_froundeven>;
+defm VRINTXN : VRINT_FPI<"x", 0b001, any_frint>;
+defm VRINTAN : VRINT_FPI<"a", 0b010, any_fround>;
+defm VRINTZN : VRINT_FPI<"z", 0b011, any_ftrunc>;
+defm VRINTMN : VRINT_FPI<"m", 0b101, any_ffloor>;
+defm VRINTPN : VRINT_FPI<"p", 0b111, any_fceil>;
 
 // Cryptography instructions
 let PostEncoderMethod = "NEONThumb2DataIPostEncoder",
diff --git a/llvm/test/CodeGen/ARM/fp-intrinsics-vector-v8.ll 
b/llvm/test/CodeGen/ARM/fp-intrinsics-vector-v8.ll
index 4840da192c09e..3014da24aa727 100644
--- a/llvm/test/CodeGen/ARM/fp-intrinsics-vector-v8.ll
+++ b/llvm/test/CodeGen/ARM/fp-intrinsics-vector-v8.ll
@@ -17,11 +17,11 @@ define <4 x float> @nearbyint_v4f32(<4 x float> %x) #0 {
 define <4 x float> @rint_v4f32(<4 x float> %x) #0 {
 ; CHECK-LABEL: rint_v4f32:
 ; CHECK:       @ %bb.0:
-; CHECK-NEXT:    vrintx.f32 s7, s3
-; CHECK-NEXT:    vrintx.f32 s6, s2
-; CHECK-NEXT:    vrintx.f32 s5, s1
-; CHECK-NEXT:    vrintx.f32 s4, s0
-; CHECK-NEXT:    vorr q0, q1, q1
+; CHECK-NEXT:    vmov d17, r2, r3
+; CHECK-NEXT:    vmov d16, r0, r1
+; CHECK-NEXT:    vrintx.f32 q8, q8
+; CHECK-NEXT:    vmov r0, r1, d16
+; CHECK-NEXT:    vmov r2, r3, d17
 ; CHECK-NEXT:    bx lr
   %val = call <4 x float> @llvm.experimental.constrained.rint.v4f32(<4 x 
float> %x, metadata !"round.dynamic", metadata !"fpexcept.strict")
   ret <4 x float> %val
@@ -30,11 +30,11 @@ define <4 x float> @rint_v4f32(<4 x float> %x) #0 {
 define <4 x float> @round_v4f32(<4 x float> %x) #0 {
 ; CHECK-LABEL: round_v4f32:
 ; CHECK:       @ %bb.0:
-; CHECK-NEXT:    vrinta.f32 s7, s3
-; CHECK-NEXT:    vrinta.f32 s6, s2
-; CHECK-NEXT:    vrinta.f32 s5, s1
-; CHECK-NEXT:    vrinta.f32 s4, s0
-; CHECK-NEXT:    vorr q0, q1, q1
+; CHECK-NEXT:    vmov d17, r2, r3
+; CHECK-NEXT:    vmov d16, r0, r1
+; CHECK-NEXT:    vrinta.f32 q8, q8
+; CHECK-NEXT:    vmov r0, r1, d16
+; CHECK-NEXT:    vmov r2, r3, d17
 ; CHECK-NEXT:    bx lr
   %val = call <4 x float> @llvm.experimental.constrained.round.v4f32(<4 x 
float> %x, metadata !"fpexcept.strict")
   ret <4 x float> %val
@@ -43,11 +43,11 @@ define <4 x float> @round_v4f32(<4 x float> %x) #0 {
 define <4 x float> @roundeven_v4f32(<4 x float> %x) #0 {
 ; CHECK-LABEL: roundeven_v4f32:
 ; CHECK:       @ %bb.0:
-; CHECK-NEXT:    vrintn.f32 s7, s3
-; CHECK-NEXT:    vrintn.f32 s6, s2
-; CHECK-NEXT:    vrintn.f32 s5, s1
-; CHECK-NEXT:    vrintn.f32 s4, s0
-; CHECK-NEXT:    vorr q0, q1, q1
+; CHECK-NEXT:    vmov d17, r2, r3
+; CHECK-NEXT:    vmov d16, r0, r1
+; CHECK-NEXT:    vrintn.f32 q8, q8
+; CHECK-NEXT:    vmov r0, r1, d16
+; CHECK-NEXT:    vmov r2, r3, d17
 ; CHECK-NEXT:    bx lr
   %val = call <4 x float> @llvm.experimental.constrained.roundeven.v4f32(<4 x 
float> %x, metadata !"fpexcept.strict")
   ret <4 x float> %val
@@ -56,11 +56,11 @@ define <4 x float> @roundeven_v4f32(<4 x float> %x) #0 {
 define <4 x float> @floor_v4f32(<4 x float> %x) #0 {
 ; CHECK-LABEL: floor_v4f32:
 ; CHECK:       @ %bb.0:
-; CHECK-NEXT:    vrintm.f32 s7, s3
-; CHECK-NEXT:    vrintm.f32 s6, s2
-; CHECK-NEXT:    vrintm.f32 s5, s1
-; CHECK-NEXT:    vrintm.f32 s4, s0
-; CHECK-NEXT:    vorr q0, q1, q1
+; CHECK-NEXT:    vmov d17, r2, r3
+; CHECK-NEXT:    vmov d16, r0, r1
+; CHECK-NEXT:    vrintm.f32 q8, q8
+; CHECK-NEXT:    vmov r0, r1, d16
+; CHECK-NEXT:    vmov r2, r3, d17
 ; CHECK-NEXT:    bx lr
   %val = call <4 x float> @llvm.experimental.constrained.floor.v4f32(<4 x 
float> %x, metadata !"fpexcept.strict")
   ret <4 x float> %val
@@ -69,11 +69,11 @@ define <4 x float> @floor_v4f32(<4 x float> %x) #0 {
 define <4 x float> @ceil_v4f32(<4 x float> %x) #0 {
 ; CHECK-LABEL: ceil_v4f32:
 ; CHECK:       @ %bb.0:
-; CHECK-NEXT:    vrintp.f32 s7, s3
-; CHECK-NEXT:    vrintp.f32 s6, s2
-; CHECK-NEXT:    vrintp.f32 s5, s1
-; CHECK-NEXT:    vrintp.f32 s4, s0
-; CHECK-NEXT:    vorr q0, q1, q1
+; CHECK-NEXT:    vmov d17, r2, r3
+; CHECK-NEXT:    vmov d16, r0, r1
+; CHECK-NEXT:    vrintp.f32 q8, q8
+; CHECK-NEXT:    vmov r0, r1, d16
+; CHECK-NEXT:    vmov r2, r3, d17
 ; CHECK-NEXT:    bx lr
   %val = call <4 x float> @llvm.experimental.constrained.ceil.v4f32(<4 x 
float> %x, metadata !"fpexcept.strict")
   ret <4 x float> %val

>From 07166da44d1bc03ccc6830124192a8fe30b60547 Mon Sep 17 00:00:00 2001
From: Serge Pavlov <[email protected]>
Date: Mon, 9 Feb 2026 17:41:15 +0700
Subject: [PATCH 2/4] Address review comments

---
 llvm/.clang-format                      |  2 +-
 llvm/lib/Target/ARM/ARMISelLowering.cpp | 51 +++++++------------------
 2 files changed, 14 insertions(+), 39 deletions(-)

diff --git a/llvm/.clang-format b/llvm/.clang-format
index 4528c41de9546..1541b796e3ff3 100644
--- a/llvm/.clang-format
+++ b/llvm/.clang-format
@@ -1,5 +1,5 @@
 BasedOnStyle: LLVM
-LineEnding: LF
+#LineEnding: LF
 ---
 # Don't format .td files
 Language: TableGen
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp 
b/llvm/lib/Target/ARM/ARMISelLowering.cpp
index 86fbb1460f492..594beb4f290f8 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.cpp
+++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp
@@ -368,24 +368,17 @@ void ARMTargetLowering::addMVEVectorTypes(bool HasMVEFP) {
     if (HasMVEFP) {
       setOperationAction(ISD::FMINNUM, VT, Legal);
       setOperationAction(ISD::FMAXNUM, VT, Legal);
-      setOperationAction(ISD::FROUND, VT, Legal);
-      setOperationAction(ISD::FROUNDEVEN, VT, Legal);
-      setOperationAction(ISD::FRINT, VT, Legal);
-      setOperationAction(ISD::FTRUNC, VT, Legal);
-      setOperationAction(ISD::FFLOOR, VT, Legal);
-      setOperationAction(ISD::FCEIL, VT, Legal);
+      for (auto Op : {ISD::FROUND, ISD::STRICT_FROUND, ISD::FROUNDEVEN,
+                      ISD::STRICT_FROUNDEVEN, ISD::FTRUNC, ISD::STRICT_FTRUNC,
+                      ISD::FRINT, ISD::STRICT_FRINT, ISD::FFLOOR,
+                      ISD::STRICT_FFLOOR, ISD::FCEIL, ISD::STRICT_FCEIL}) {
+        setOperationAction(Op, VT, Legal);
+      }
       setOperationAction(ISD::VECREDUCE_FADD, VT, Custom);
       setOperationAction(ISD::VECREDUCE_FMUL, VT, Custom);
       setOperationAction(ISD::VECREDUCE_FMIN, VT, Custom);
       setOperationAction(ISD::VECREDUCE_FMAX, VT, Custom);
 
-      setOperationAction(ISD::STRICT_FROUND, VT, Legal);
-      setOperationAction(ISD::STRICT_FROUNDEVEN, VT, Legal);
-      setOperationAction(ISD::STRICT_FRINT, VT, Legal);
-      setOperationAction(ISD::STRICT_FTRUNC, VT, Legal);
-      setOperationAction(ISD::STRICT_FFLOOR, VT, Legal);
-      setOperationAction(ISD::STRICT_FCEIL, VT, Legal);
-
       // No native support for these.
       setOperationAction(ISD::FDIV, VT, Expand);
       setOperationAction(ISD::FREM, VT, Expand);
@@ -1351,31 +1344,13 @@ ARMTargetLowering::ARMTargetLowering(const 
TargetMachine &TM_,
     setOperationAction(ISD::FMAXIMUM, MVT::v4f32, Legal);
 
     if (Subtarget->hasV8Ops()) {
-      setOperationAction(ISD::FFLOOR, MVT::v2f32, Legal);
-      setOperationAction(ISD::FFLOOR, MVT::v4f32, Legal);
-      setOperationAction(ISD::FROUND, MVT::v2f32, Legal);
-      setOperationAction(ISD::FROUND, MVT::v4f32, Legal);
-      setOperationAction(ISD::FROUNDEVEN, MVT::v2f32, Legal);
-      setOperationAction(ISD::FROUNDEVEN, MVT::v4f32, Legal);
-      setOperationAction(ISD::FCEIL, MVT::v2f32, Legal);
-      setOperationAction(ISD::FCEIL, MVT::v4f32, Legal);
-      setOperationAction(ISD::FTRUNC, MVT::v2f32, Legal);
-      setOperationAction(ISD::FTRUNC, MVT::v4f32, Legal);
-      setOperationAction(ISD::FRINT, MVT::v2f32, Legal);
-      setOperationAction(ISD::FRINT, MVT::v4f32, Legal);
-
-      setOperationAction(ISD::STRICT_FFLOOR, MVT::v2f32, Legal);
-      setOperationAction(ISD::STRICT_FFLOOR, MVT::v4f32, Legal);
-      setOperationAction(ISD::STRICT_FROUND, MVT::v2f32, Legal);
-      setOperationAction(ISD::STRICT_FROUND, MVT::v4f32, Legal);
-      setOperationAction(ISD::STRICT_FROUNDEVEN, MVT::v2f32, Legal);
-      setOperationAction(ISD::STRICT_FROUNDEVEN, MVT::v4f32, Legal);
-      setOperationAction(ISD::STRICT_FCEIL, MVT::v2f32, Legal);
-      setOperationAction(ISD::STRICT_FCEIL, MVT::v4f32, Legal);
-      setOperationAction(ISD::STRICT_FTRUNC, MVT::v2f32, Legal);
-      setOperationAction(ISD::STRICT_FTRUNC, MVT::v4f32, Legal);
-      setOperationAction(ISD::STRICT_FRINT, MVT::v2f32, Legal);
-      setOperationAction(ISD::STRICT_FRINT, MVT::v4f32, Legal);
+      for (auto Op : {ISD::FROUND, ISD::STRICT_FROUND, ISD::FROUNDEVEN,
+                      ISD::STRICT_FROUNDEVEN, ISD::FTRUNC, ISD::STRICT_FTRUNC,
+                      ISD::FRINT, ISD::STRICT_FRINT, ISD::FFLOOR,
+                      ISD::STRICT_FFLOOR, ISD::FCEIL, ISD::STRICT_FCEIL}) {
+        setOperationAction(Op, MVT::v2f32, Legal);
+        setOperationAction(Op, MVT::v4f32, Legal);
+      }
     }
 
     if (Subtarget->hasFullFP16()) {

>From ed96199eb8ea1cbad598b3979befd0f368f2cefa Mon Sep 17 00:00:00 2001
From: Serge Pavlov <[email protected]>
Date: Mon, 9 Feb 2026 23:21:48 +0700
Subject: [PATCH 3/4] Remove stray change

---
 llvm/.clang-format | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/.clang-format b/llvm/.clang-format
index 1541b796e3ff3..4528c41de9546 100644
--- a/llvm/.clang-format
+++ b/llvm/.clang-format
@@ -1,5 +1,5 @@
 BasedOnStyle: LLVM
-#LineEnding: LF
+LineEnding: LF
 ---
 # Don't format .td files
 Language: TableGen

>From 378532db2a6283a2a3c836875f2fe68f697d540b Mon Sep 17 00:00:00 2001
From: Serge Pavlov <[email protected]>
Date: Tue, 10 Feb 2026 18:37:26 +0700
Subject: [PATCH 4/4] Update because dependency is changed

---
 .../CodeGen/ARM/fp-intrinsics-vector-v8.ll    |  30 +-----
 .../Thumb2/mve-intrinsics/strict-round.ll     | 102 ++----------------
 2 files changed, 11 insertions(+), 121 deletions(-)

diff --git a/llvm/test/CodeGen/ARM/fp-intrinsics-vector-v8.ll 
b/llvm/test/CodeGen/ARM/fp-intrinsics-vector-v8.ll
index 3014da24aa727..cb9fcc6eca77b 100644
--- a/llvm/test/CodeGen/ARM/fp-intrinsics-vector-v8.ll
+++ b/llvm/test/CodeGen/ARM/fp-intrinsics-vector-v8.ll
@@ -17,11 +17,7 @@ define <4 x float> @nearbyint_v4f32(<4 x float> %x) #0 {
 define <4 x float> @rint_v4f32(<4 x float> %x) #0 {
 ; CHECK-LABEL: rint_v4f32:
 ; CHECK:       @ %bb.0:
-; CHECK-NEXT:    vmov d17, r2, r3
-; CHECK-NEXT:    vmov d16, r0, r1
-; CHECK-NEXT:    vrintx.f32 q8, q8
-; CHECK-NEXT:    vmov r0, r1, d16
-; CHECK-NEXT:    vmov r2, r3, d17
+; CHECK-NEXT:    vrintx.f32 q0, q0
 ; CHECK-NEXT:    bx lr
   %val = call <4 x float> @llvm.experimental.constrained.rint.v4f32(<4 x 
float> %x, metadata !"round.dynamic", metadata !"fpexcept.strict")
   ret <4 x float> %val
@@ -30,11 +26,7 @@ define <4 x float> @rint_v4f32(<4 x float> %x) #0 {
 define <4 x float> @round_v4f32(<4 x float> %x) #0 {
 ; CHECK-LABEL: round_v4f32:
 ; CHECK:       @ %bb.0:
-; CHECK-NEXT:    vmov d17, r2, r3
-; CHECK-NEXT:    vmov d16, r0, r1
-; CHECK-NEXT:    vrinta.f32 q8, q8
-; CHECK-NEXT:    vmov r0, r1, d16
-; CHECK-NEXT:    vmov r2, r3, d17
+; CHECK-NEXT:    vrinta.f32 q0, q0
 ; CHECK-NEXT:    bx lr
   %val = call <4 x float> @llvm.experimental.constrained.round.v4f32(<4 x 
float> %x, metadata !"fpexcept.strict")
   ret <4 x float> %val
@@ -43,11 +35,7 @@ define <4 x float> @round_v4f32(<4 x float> %x) #0 {
 define <4 x float> @roundeven_v4f32(<4 x float> %x) #0 {
 ; CHECK-LABEL: roundeven_v4f32:
 ; CHECK:       @ %bb.0:
-; CHECK-NEXT:    vmov d17, r2, r3
-; CHECK-NEXT:    vmov d16, r0, r1
-; CHECK-NEXT:    vrintn.f32 q8, q8
-; CHECK-NEXT:    vmov r0, r1, d16
-; CHECK-NEXT:    vmov r2, r3, d17
+; CHECK-NEXT:    vrintn.f32 q0, q0
 ; CHECK-NEXT:    bx lr
   %val = call <4 x float> @llvm.experimental.constrained.roundeven.v4f32(<4 x 
float> %x, metadata !"fpexcept.strict")
   ret <4 x float> %val
@@ -56,11 +44,7 @@ define <4 x float> @roundeven_v4f32(<4 x float> %x) #0 {
 define <4 x float> @floor_v4f32(<4 x float> %x) #0 {
 ; CHECK-LABEL: floor_v4f32:
 ; CHECK:       @ %bb.0:
-; CHECK-NEXT:    vmov d17, r2, r3
-; CHECK-NEXT:    vmov d16, r0, r1
-; CHECK-NEXT:    vrintm.f32 q8, q8
-; CHECK-NEXT:    vmov r0, r1, d16
-; CHECK-NEXT:    vmov r2, r3, d17
+; CHECK-NEXT:    vrintm.f32 q0, q0
 ; CHECK-NEXT:    bx lr
   %val = call <4 x float> @llvm.experimental.constrained.floor.v4f32(<4 x 
float> %x, metadata !"fpexcept.strict")
   ret <4 x float> %val
@@ -69,11 +53,7 @@ define <4 x float> @floor_v4f32(<4 x float> %x) #0 {
 define <4 x float> @ceil_v4f32(<4 x float> %x) #0 {
 ; CHECK-LABEL: ceil_v4f32:
 ; CHECK:       @ %bb.0:
-; CHECK-NEXT:    vmov d17, r2, r3
-; CHECK-NEXT:    vmov d16, r0, r1
-; CHECK-NEXT:    vrintp.f32 q8, q8
-; CHECK-NEXT:    vmov r0, r1, d16
-; CHECK-NEXT:    vmov r2, r3, d17
+; CHECK-NEXT:    vrintp.f32 q0, q0
 ; CHECK-NEXT:    bx lr
   %val = call <4 x float> @llvm.experimental.constrained.ceil.v4f32(<4 x 
float> %x, metadata !"fpexcept.strict")
   ret <4 x float> %val
diff --git a/llvm/test/CodeGen/Thumb2/mve-intrinsics/strict-round.ll 
b/llvm/test/CodeGen/Thumb2/mve-intrinsics/strict-round.ll
index 2ca7cd41f98c2..33b68a5b4e61c 100644
--- a/llvm/test/CodeGen/Thumb2/mve-intrinsics/strict-round.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-intrinsics/strict-round.ll
@@ -4,22 +4,7 @@
 define arm_aapcs_vfpcc <8 x half> @test_rint_f16(<8 x half> %a) #0 {
 ; CHECK-LABEL: test_rint_f16:
 ; CHECK:       @ %bb.0: @ %entry
-; CHECK-NEXT:    vmovx.f16 s4, s0
-; CHECK-NEXT:    vrintx.f16 s0, s0
-; CHECK-NEXT:    vrintx.f16 s4, s4
-; CHECK-NEXT:    vins.f16 s0, s4
-; CHECK-NEXT:    vmovx.f16 s4, s1
-; CHECK-NEXT:    vrintx.f16 s4, s4
-; CHECK-NEXT:    vrintx.f16 s1, s1
-; CHECK-NEXT:    vins.f16 s1, s4
-; CHECK-NEXT:    vmovx.f16 s4, s2
-; CHECK-NEXT:    vrintx.f16 s4, s4
-; CHECK-NEXT:    vrintx.f16 s2, s2
-; CHECK-NEXT:    vins.f16 s2, s4
-; CHECK-NEXT:    vmovx.f16 s4, s3
-; CHECK-NEXT:    vrintx.f16 s4, s4
-; CHECK-NEXT:    vrintx.f16 s3, s3
-; CHECK-NEXT:    vins.f16 s3, s4
+; CHECK-NEXT:    vrintx.f16 q0, q0
 ; CHECK-NEXT:    bx lr
 entry:
   %0 = tail call <8 x half> @llvm.experimental.constrained.rint.v8f16(<8 x 
half> %a, metadata !"round.dynamic", metadata !"fpexcept.strict")
@@ -29,22 +14,7 @@ entry:
 define arm_aapcs_vfpcc <8 x half> @test_roundeven_f16(<8 x half> %a) #0 {
 ; CHECK-LABEL: test_roundeven_f16:
 ; CHECK:       @ %bb.0: @ %entry
-; CHECK-NEXT:    vmovx.f16 s4, s0
-; CHECK-NEXT:    vrintn.f16 s0, s0
-; CHECK-NEXT:    vrintn.f16 s4, s4
-; CHECK-NEXT:    vins.f16 s0, s4
-; CHECK-NEXT:    vmovx.f16 s4, s1
-; CHECK-NEXT:    vrintn.f16 s4, s4
-; CHECK-NEXT:    vrintn.f16 s1, s1
-; CHECK-NEXT:    vins.f16 s1, s4
-; CHECK-NEXT:    vmovx.f16 s4, s2
-; CHECK-NEXT:    vrintn.f16 s4, s4
-; CHECK-NEXT:    vrintn.f16 s2, s2
-; CHECK-NEXT:    vins.f16 s2, s4
-; CHECK-NEXT:    vmovx.f16 s4, s3
-; CHECK-NEXT:    vrintn.f16 s4, s4
-; CHECK-NEXT:    vrintn.f16 s3, s3
-; CHECK-NEXT:    vins.f16 s3, s4
+; CHECK-NEXT:    vrintn.f16 q0, q0
 ; CHECK-NEXT:    bx lr
 entry:
   %0 = tail call <8 x half> @llvm.experimental.constrained.roundeven.v8f16(<8 
x half> %a, metadata !"fpexcept.strict")
@@ -54,22 +24,7 @@ entry:
 define arm_aapcs_vfpcc <8 x half> @test_round_f16(<8 x half> %a) #0 {
 ; CHECK-LABEL: test_round_f16:
 ; CHECK:       @ %bb.0: @ %entry
-; CHECK-NEXT:    vmovx.f16 s4, s0
-; CHECK-NEXT:    vrinta.f16 s0, s0
-; CHECK-NEXT:    vrinta.f16 s4, s4
-; CHECK-NEXT:    vins.f16 s0, s4
-; CHECK-NEXT:    vmovx.f16 s4, s1
-; CHECK-NEXT:    vrinta.f16 s4, s4
-; CHECK-NEXT:    vrinta.f16 s1, s1
-; CHECK-NEXT:    vins.f16 s1, s4
-; CHECK-NEXT:    vmovx.f16 s4, s2
-; CHECK-NEXT:    vrinta.f16 s4, s4
-; CHECK-NEXT:    vrinta.f16 s2, s2
-; CHECK-NEXT:    vins.f16 s2, s4
-; CHECK-NEXT:    vmovx.f16 s4, s3
-; CHECK-NEXT:    vrinta.f16 s4, s4
-; CHECK-NEXT:    vrinta.f16 s3, s3
-; CHECK-NEXT:    vins.f16 s3, s4
+; CHECK-NEXT:    vrinta.f16 q0, q0
 ; CHECK-NEXT:    bx lr
 entry:
   %0 = tail call <8 x half> @llvm.experimental.constrained.round.v8f16(<8 x 
half> %a, metadata !"fpexcept.strict")
@@ -79,22 +34,7 @@ entry:
 define arm_aapcs_vfpcc <8 x half> @test_trunc_f16(<8 x half> %a) #0 {
 ; CHECK-LABEL: test_trunc_f16:
 ; CHECK:       @ %bb.0: @ %entry
-; CHECK-NEXT:    vmovx.f16 s4, s0
-; CHECK-NEXT:    vrintz.f16 s0, s0
-; CHECK-NEXT:    vrintz.f16 s4, s4
-; CHECK-NEXT:    vins.f16 s0, s4
-; CHECK-NEXT:    vmovx.f16 s4, s1
-; CHECK-NEXT:    vrintz.f16 s4, s4
-; CHECK-NEXT:    vrintz.f16 s1, s1
-; CHECK-NEXT:    vins.f16 s1, s4
-; CHECK-NEXT:    vmovx.f16 s4, s2
-; CHECK-NEXT:    vrintz.f16 s4, s4
-; CHECK-NEXT:    vrintz.f16 s2, s2
-; CHECK-NEXT:    vins.f16 s2, s4
-; CHECK-NEXT:    vmovx.f16 s4, s3
-; CHECK-NEXT:    vrintz.f16 s4, s4
-; CHECK-NEXT:    vrintz.f16 s3, s3
-; CHECK-NEXT:    vins.f16 s3, s4
+; CHECK-NEXT:    vrintz.f16 q0, q0
 ; CHECK-NEXT:    bx lr
 entry:
   %0 = tail call <8 x half> @llvm.experimental.constrained.trunc.v8f16(<8 x 
half> %a, metadata !"fpexcept.strict")
@@ -104,22 +44,7 @@ entry:
 define arm_aapcs_vfpcc <8 x half> @test_floor_f16(<8 x half> %a) #0 {
 ; CHECK-LABEL: test_floor_f16:
 ; CHECK:       @ %bb.0: @ %entry
-; CHECK-NEXT:    vmovx.f16 s4, s0
-; CHECK-NEXT:    vrintm.f16 s0, s0
-; CHECK-NEXT:    vrintm.f16 s4, s4
-; CHECK-NEXT:    vins.f16 s0, s4
-; CHECK-NEXT:    vmovx.f16 s4, s1
-; CHECK-NEXT:    vrintm.f16 s4, s4
-; CHECK-NEXT:    vrintm.f16 s1, s1
-; CHECK-NEXT:    vins.f16 s1, s4
-; CHECK-NEXT:    vmovx.f16 s4, s2
-; CHECK-NEXT:    vrintm.f16 s4, s4
-; CHECK-NEXT:    vrintm.f16 s2, s2
-; CHECK-NEXT:    vins.f16 s2, s4
-; CHECK-NEXT:    vmovx.f16 s4, s3
-; CHECK-NEXT:    vrintm.f16 s4, s4
-; CHECK-NEXT:    vrintm.f16 s3, s3
-; CHECK-NEXT:    vins.f16 s3, s4
+; CHECK-NEXT:    vrintm.f16 q0, q0
 ; CHECK-NEXT:    bx lr
 entry:
   %0 = tail call <8 x half> @llvm.experimental.constrained.floor.v8f16(<8 x 
half> %a, metadata !"fpexcept.strict")
@@ -129,22 +54,7 @@ entry:
 define arm_aapcs_vfpcc <8 x half> @test_ceil_f16(<8 x half> %a) #0 {
 ; CHECK-LABEL: test_ceil_f16:
 ; CHECK:       @ %bb.0: @ %entry
-; CHECK-NEXT:    vmovx.f16 s4, s0
-; CHECK-NEXT:    vrintp.f16 s0, s0
-; CHECK-NEXT:    vrintp.f16 s4, s4
-; CHECK-NEXT:    vins.f16 s0, s4
-; CHECK-NEXT:    vmovx.f16 s4, s1
-; CHECK-NEXT:    vrintp.f16 s4, s4
-; CHECK-NEXT:    vrintp.f16 s1, s1
-; CHECK-NEXT:    vins.f16 s1, s4
-; CHECK-NEXT:    vmovx.f16 s4, s2
-; CHECK-NEXT:    vrintp.f16 s4, s4
-; CHECK-NEXT:    vrintp.f16 s2, s2
-; CHECK-NEXT:    vins.f16 s2, s4
-; CHECK-NEXT:    vmovx.f16 s4, s3
-; CHECK-NEXT:    vrintp.f16 s4, s4
-; CHECK-NEXT:    vrintp.f16 s3, s3
-; CHECK-NEXT:    vins.f16 s3, s4
+; CHECK-NEXT:    vrintp.f16 q0, q0
 ; CHECK-NEXT:    bx lr
 entry:
   %0 = tail call <8 x half> @llvm.experimental.constrained.ceil.v8f16(<8 x 
half> %a, metadata !"fpexcept.strict")

_______________________________________________
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

Reply via email to