[llvm-branch-commits] [compiler-rt] [compiler-rt][ARM] Optimized single-precision FP comparisons (PR #179925)

Simon Tatham via llvm-branch-commits Thu, 05 Feb 2026 04:03:17 -0800

https://github.com/statham-arm created 
https://github.com/llvm/llvm-project/pull/179925


These comparison functions follow the same structure as the double-precision 
ones in a prior commit, of a header file containing the main logic and some 
entry points varying the construction of the return value.

In this case, we have provided versions for Thumb1 as well as Arm/Thumb2.

>From 017a0e3bac60a714ef6923eead584a91eebbf2a1 Mon Sep 17 00:00:00 2001
From: Simon Tatham <[email protected]>
Date: Thu, 29 Jan 2026 16:10:11 +0000
Subject: [PATCH] [compiler-rt][ARM] Optimized single-precision FP comparisons

These comparison functions follow the same structure as the
double-precision ones in a prior commit, of a header file containing
the main logic and some entry points varying the construction of the
return value.

In this case, we have provided versions for Thumb1 as well as
Arm/Thumb2.
---
 compiler-rt/lib/builtins/CMakeLists.txt       |   9 +
 compiler-rt/lib/builtins/arm/cmpsf2.S         |  56 +++
 compiler-rt/lib/builtins/arm/fcmp.h           | 174 +++++++
 compiler-rt/lib/builtins/arm/gesf2.S          |  54 +++
 compiler-rt/lib/builtins/arm/thumb1/cmpsf2.S  |  55 +++
 compiler-rt/lib/builtins/arm/thumb1/fcmp.h    | 191 ++++++++
 compiler-rt/lib/builtins/arm/thumb1/gesf2.S   |  54 +++
 .../lib/builtins/arm/thumb1/unordsf2.S        |  49 ++
 compiler-rt/lib/builtins/arm/unordsf2.S       |  56 +++
 .../test/builtins/Unit/comparesf2new_test.c   | 433 ++++++++++++++++++
 10 files changed, 1131 insertions(+)
 create mode 100644 compiler-rt/lib/builtins/arm/cmpsf2.S
 create mode 100644 compiler-rt/lib/builtins/arm/fcmp.h
 create mode 100644 compiler-rt/lib/builtins/arm/gesf2.S
 create mode 100644 compiler-rt/lib/builtins/arm/thumb1/cmpsf2.S
 create mode 100644 compiler-rt/lib/builtins/arm/thumb1/fcmp.h
 create mode 100644 compiler-rt/lib/builtins/arm/thumb1/gesf2.S
 create mode 100644 compiler-rt/lib/builtins/arm/thumb1/unordsf2.S
 create mode 100644 compiler-rt/lib/builtins/arm/unordsf2.S
 create mode 100644 compiler-rt/test/builtins/Unit/comparesf2new_test.c

diff --git a/compiler-rt/lib/builtins/CMakeLists.txt 
b/compiler-rt/lib/builtins/CMakeLists.txt
index 0e8b0fa553442..0c53781a51392 100644
--- a/compiler-rt/lib/builtins/CMakeLists.txt
+++ b/compiler-rt/lib/builtins/CMakeLists.txt
@@ -451,8 +451,11 @@ if(COMPILER_RT_ARM_OPTIMIZED_FP AND BUILTIN_SUPPORTED_ARCH 
MATCHES "arm")
       arm/muldf3.S
       arm/divdf3.S
       arm/cmpdf2.S
+      arm/cmpsf2.S
       arm/gedf2.S
+      arm/gesf2.S
       arm/unorddf2.S
+      arm/unordsf2.S
       )
     set_source_files_properties(${assembly_files}
       PROPERTIES COMPILE_OPTIONS ${implicit_it_flag})
@@ -507,8 +510,11 @@ if(COMPILER_RT_ARM_OPTIMIZED_FP)
   set(thumb1_base_SOURCES
     arm/thumb1/mulsf3.S
     arm/thumb1/cmpdf2.S
+    arm/thumb1/cmpsf2.S
     arm/thumb1/gedf2.S
+    arm/thumb1/gesf2.S
     arm/thumb1/unorddf2.S
+    arm/thumb1/unordsf2.S
     arm/fnan2.c
     arm/fnorm2.c
     arm/funder.c
@@ -516,6 +522,9 @@ if(COMPILER_RT_ARM_OPTIMIZED_FP)
   )
   set_property(SOURCE arm/thumb1/cmpdf2.S PROPERTY crt_supersedes comparedf2.c)
   set_property(SOURCE arm/thumb1/cmpdf2.S DIRECTORY ${COMPILER_RT_SOURCE_DIR} 
PROPERTY crt_provides comparedf2)
+  set_property(SOURCE arm/thumb1/cmpsf2.S PROPERTY crt_supersedes comparesf2.S)
+  # We don't need to set 'crt_provides' for cmpsf2.S, because the
+  # superseded comparesf2.S will already have enabled the comparesf2 tests.
 endif()
 
 set(arm_EABI_RT_SOURCES
diff --git a/compiler-rt/lib/builtins/arm/cmpsf2.S 
b/compiler-rt/lib/builtins/arm/cmpsf2.S
new file mode 100644
index 0000000000000..14166246101af
--- /dev/null
+++ b/compiler-rt/lib/builtins/arm/cmpsf2.S
@@ -0,0 +1,56 @@
+//===-- cmpsf2.S - single-precision floating point comparison 
-------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This function has the semantics of GNU __cmpsf2: it's a three-way compare
+// which returns <0 if x<y, 0 if x==y, and >0 if x>y. If the result is
+// unordered (i.e. x or y or both is NaN) then it returns >0.
+//
+// This also makes it suitable for use as all of __eqsf2, __nesf2, __ltsf2 or
+// __lesf2.
+//
+//===----------------------------------------------------------------------===//
+
+#include "../assembly.h"
+
+  .syntax unified
+  .text
+  .p2align 2
+
+op0 .req r0
+op1 .req r1
+.macro SetReturnRegister
+  mov r0, #0
+  movhi r0, #1
+  movlo r0, #-1
+.endm
+
+#if __ARM_PCS_VFP
+DEFINE_COMPILERRT_FUNCTION(__cmpsf2)
+  push {r4, lr}
+  vmov r0, s0
+  vmov r1, s1
+  bl __compiler_rt_softfp_cmpsf2
+  pop {r4, pc}
+#else
+DEFINE_COMPILERRT_FUNCTION_ALIAS(__cmpsf2, __compiler_rt_softfp_cmpsf2)
+#endif
+DEFINE_COMPILERRT_FUNCTION_ALIAS(__lesf2, __cmpsf2)
+DEFINE_COMPILERRT_FUNCTION_ALIAS(__ltsf2, __cmpsf2)
+DEFINE_COMPILERRT_FUNCTION_ALIAS(__eqsf2, __cmpsf2)
+DEFINE_COMPILERRT_FUNCTION_ALIAS(__nesf2, __cmpsf2)
+
+DEFINE_COMPILERRT_FUNCTION(__compiler_rt_softfp_cmpsf2)
+  #include "fcmp.h"
+
+LOCAL_LABEL(NaN):
+  mov r0, #+1
+  bx lr
+
+END_COMPILERRT_FUNCTION(__compiler_rt_softfp_cmpsf2)
+
+NO_EXEC_STACK_DIRECTIVE
diff --git a/compiler-rt/lib/builtins/arm/fcmp.h 
b/compiler-rt/lib/builtins/arm/fcmp.h
new file mode 100644
index 0000000000000..23bdd73a10c5b
--- /dev/null
+++ b/compiler-rt/lib/builtins/arm/fcmp.h
@@ -0,0 +1,174 @@
+//===-- fcmp.h - shared code for single-precision FP comparison functions 
-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This code is the skeleton of a double-precision FP compare, with two details
+// left out: which input value is in which register, and how to make the return
+// value. It allows the main comparison logic to be shared between (for
+// example) __lesf2 and __gesf2, varying only those details.
+//
+//===----------------------------------------------------------------------===//
+
+// How to use this header file:
+//
+// This header file is expected to be #included from inside a function
+// definition in a .S file. The source file including this header should
+// provide the following:
+//
+// op0 and op1: register aliases (via .req) for the registers containing the
+// input operands.
+//  - For most comparisons, op0 will correspond to r0 and op1 to r1.
+//  - But a function with the reversed semantics of __aeabi_cfrcmple wil define
+//    them the other way round.
+//
+// SetReturnRegister: an assembly macro that looks at the PSR flags and sets up
+// an appropriate return value in r0, for the cases that do *not* involve NaN.
+//  - On entry to this macro, the condition codes LO, EQ and HI indicate that
+//    op0 < op1, op0 == op1 or op0 > op1 respectively.
+//  - For functions that return a result in the flags, this macro can be empty,
+//    because those are the correct flags to return anyway.
+//  - Functions that return a boolean in r0 should set it up by checking the
+//    flags.
+//
+// LOCAL_LABEL(NaN): a label defined within the compare function, after the
+// #include of this header. Called when at least one input is a NaN, and sets
+// up the appropriate return value for that case.
+
+// --------------------------------------------------
+// The actual entry point of the compare function.
+//
+// The basic plan is to start by ORing together the two inputs. This tells us
+// two things:
+//  - the top bit of the output tells us whether both inputs are positive, or
+//    whether at least one is negative
+//  - if the 8 exponent bits of the output are not all 1, then there are
+//    definitely no NaNs, so a fast path can handle most non-NaN cases.
+
+  // First diverge control for the negative-numbers case.
+  orrs    r12, op0, op1
+  bmi     LOCAL_LABEL(negative)         // high bit set => at least one 
negative input
+
+  // Here, both inputs are positive. Try adding 1<<23 to their bitwise OR in
+  // r12. This will carry all the way into the top bit, setting the N flag, if
+  // all 8 exponent bits were set.
+  cmn     r12, #1 << 23
+  bmi     LOCAL_LABEL(NaNInf_check_positive) // need to look harder for NaNs
+
+  // The fastest fast path: both inputs positive and we could easily tell there
+  // were no NaNs. So we just compare op0 and op1 as unsigned integers.
+  cmp     op0, op1
+  SetReturnRegister
+  bx      lr
+
+LOCAL_LABEL(NaNInf_check_positive):
+  // Second tier for positive numbers. We come here if both inputs are
+  // positive, but our fast initial check didn't manage to rule out a NaN. But
+  // it's not guaranteed that there _is_ a NaN, for two reasons:
+  //
+  //  1. An input with exponent 0xFF might be an infinity instead. Those behave
+  //    normally under comparison.
+  //
+  //  2. There might not even _be_ an input with exponent 0xFF. All we know so
+  //     far is that the two inputs ORed together had all the exponent bits
+  //     set. So each of those bits is set in _at least one_ of the inputs, but
+  //     not necessarily all in the _same_ input.
+  //
+  // Test each exponent individually for 0xFF, using the same CMN idiom as
+  // above. If neither one carries into the sign bit then we have no NaNs _or_
+  // infinities and can compare the registers and return again.
+  cmn     op0, #1 << 23
+  cmnpl   op1, #1 << 23
+  bmi     LOCAL_LABEL(NaN_check_positive)
+
+  // Second-tier return path, now we've ruled out anything difficult.
+  cmp     op0, op1
+  SetReturnRegister
+  bx      lr
+
+LOCAL_LABEL(NaN_check_positive):
+  // Third tier for positive numbers. Here we know that at least one of the
+  // inputs has exponent 0xFF. But they might still be infinities rather than
+  // NaNs. So now we must check whether there's an actual NaN, by shifting each
+  // input left to get rid of the sign bit, and seeing if the result is
+  // _greater_ than 0xFF000000 (but not equal).
+  //
+  // We could have skipped the second-tier check and done this more rigorous
+  // test immediately. But that would cost an extra instruction in the case
+  // where there are no infinities or NaNs, and we assume that that is so much
+  // more common that it's worth optimizing for.
+  mov     r12, #0xFF << 24
+  cmp     r12, op0, LSL #1   // if LO, then r12 < (op0 << 1), so op0 is a NaN
+  cmphs   r12, op1, LSL #1   // if not LO, then do the same check for op1
+  blo     LOCAL_LABEL(NaN)           // now, if LO, there's definitely a NaN
+
+  // Now we've finally ruled out NaNs! And we still know both inputs are
+  // positive. So the third-tier return path can just compare the numbers
+  // again.
+  cmp     op0, op1
+  SetReturnRegister
+  bx      lr
+
+LOCAL_LABEL(negative):
+  // We come here if at least one operand is negative. We haven't checked for
+  // NaNs at all yet (the sign check came first), so repeat the first-tier
+  // check strategy of seeing if all exponent bits are set in r12.
+  //
+  // On this path, the sign bit in r12 is set, so if adding 1 to the low
+  // exponent bit carries all the way through into the sign bit, it will
+  // _clear_ the sign bit rather than setting it. So we expect MI to be the
+  // "definitely no NaNs" result, where it was PL on the positive branch.
+  cmn     r12, #1 << 23
+  bpl     LOCAL_LABEL(NaNInf_check_negative)
+
+  // Now we have no NaNs, but at least one negative number. This gives us two
+  // complications:
+  //
+  //  1. Floating-point numbers are sign/magnitude, not two's complement, so we
+  //     have to consider separately the cases of "both negative" and "one of
+  //     each sign".
+  //
+  //  2. -0 and +0 are required to compare equal.
+  //
+  // But problem #1 is not as hard as it sounds! If both operands are negative,
+  // then we can get the result we want by comparing them as unsigned integers
+  // the opposite way round, because the input with the smaller value (as an
+  // integer) is the larger number in an FP ordering sense. And if one operand
+  // is negative and the other is positive, the _same_ reversed comparison
+  // works, because the positive number (with zero sign bit) will always
+  // compare less than the negative one in an unsigned-integers sense.
+  //
+  // So we only have to worry about problem #2, signed zeroes. This only
+  // affects the answer if _both_ operands are zero. And we can check that
+  // easily, because it happens if and only if r12 = 0x80000000. (We know r12
+  // has its sign bit set; if it has no other bits set, that's because both
+  // inputs were either 0x80000000 or 0x00000000.)
+  cmp     r12, #0x80000000        // EQ if both inputs are zero
+  cmpne   op1, op0                // otherwise, compare them backwards
+  SetReturnRegister
+  bx      lr
+
+LOCAL_LABEL(NaNInf_check_negative):
+  // Second tier for negative numbers: we know the OR of the exponents is 0xFF,
+  // but again, we might not have either _actual_ exponent 0xFF, and also, an
+  // exponent 0xFF might be an infinity instead of a NaN.
+  //
+  // On this path we've already branched twice (once for negative numbers and
+  // once for the first-tier NaN check), so we'll just go straight to the
+  // precise check for NaNs.
+  mov     r12, #0xFF << 24
+  cmp     r12, op0, LSL #1   // if LO, then r12 < (op0 << 1), so op0 is a NaN
+  cmphs   r12, op1, LSL #1   // if not LO, then do the same check for op1
+  blo     LOCAL_LABEL(NaN)
+
+  // Now we've ruled out NaNs, so we can just compare the two input registers
+  // and return. On this path we _don't_ need to check for the special case of
+  // comparing two zeroes, because we only came here if the bitwise OR of the
+  // exponent fields was 0xFF, which means the exponents can't both have been
+  // zero! So we can _just_ do the reversed CMP and finish.
+  cmp     op1, op0
+  SetReturnRegister
+  bx      lr
diff --git a/compiler-rt/lib/builtins/arm/gesf2.S 
b/compiler-rt/lib/builtins/arm/gesf2.S
new file mode 100644
index 0000000000000..c149eea589f05
--- /dev/null
+++ b/compiler-rt/lib/builtins/arm/gesf2.S
@@ -0,0 +1,54 @@
+//===-- gesf2.S - single-precision floating point comparison 
--------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This function has the semantics of GNU __cmpsf2, except for its NaN
+// handling. It's a three-way compare which returns <0 if x<y, 0 if x==y, and
+// >0 if x>y. If the result is unordered (i.e. x or y or both is NaN) then it
+// returns <0, where __cmpsf2 would return >0.
+//
+// This also makes it suitable for use as __gtsf2 or __gesf2 (or __eqsf2 or
+// __nesf2).
+//
+//===----------------------------------------------------------------------===//
+
+#include "../assembly.h"
+
+  .syntax unified
+  .text
+  .p2align 2
+
+op0 .req r0
+op1 .req r1
+.macro SetReturnRegister
+  mov r0, #0
+  movhi r0, #1
+  movlo r0, #-1
+.endm
+
+#if __ARM_PCS_VFP
+DEFINE_COMPILERRT_FUNCTION(__gesf2)
+  push {r4, lr}
+  vmov r0, s0
+  vmov r1, s1
+  bl __compiler_rt_softfp_gesf2
+  pop {r4, pc}
+#else
+DEFINE_COMPILERRT_FUNCTION_ALIAS(__gesf2, __compiler_rt_softfp_gesf2)
+#endif
+DEFINE_COMPILERRT_FUNCTION_ALIAS(__gtsf2, __gesf2)
+
+DEFINE_COMPILERRT_FUNCTION(__compiler_rt_softfp_gesf2)
+  #include "fcmp.h"
+
+LOCAL_LABEL(NaN):
+  mov r0, #-1
+  bx lr
+
+END_COMPILERRT_FUNCTION(__compiler_rt_softfp_gesf2)
+
+NO_EXEC_STACK_DIRECTIVE
diff --git a/compiler-rt/lib/builtins/arm/thumb1/cmpsf2.S 
b/compiler-rt/lib/builtins/arm/thumb1/cmpsf2.S
new file mode 100644
index 0000000000000..c8611d1147366
--- /dev/null
+++ b/compiler-rt/lib/builtins/arm/thumb1/cmpsf2.S
@@ -0,0 +1,55 @@
+//===-- cmpsf2.S - single-precision floating point comparison 
-------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This function has the semantics of GNU __cmpsf2: it's a three-way compare
+// which returns <0 if x<y, 0 if x==y, and >0 if x>y. If the result is
+// unordered (i.e. x or y or both is NaN) then it returns >0.
+//
+// This also makes it suitable for use as all of __eqsf2, __nesf2, __ltsf2 or
+// __lesf2.
+//
+//===----------------------------------------------------------------------===//
+
+#include "../../assembly.h"
+
+  .syntax unified
+  .text
+  .p2align 2
+
+op0 .req r0
+op1 .req r1
+.macro SetReturnRegister
+  bhi 0f
+  blo 1f
+  movs r0, #0
+  bx lr
+0:
+  movs r0, #1
+  bx lr
+1:
+  movs r0, #1
+  rsbs r0, r0, #0
+  bx lr
+.endm
+
+DEFINE_COMPILERRT_FUNCTION_ALIAS(__cmpsf2, __compiler_rt_softfp_cmpsf2)
+DEFINE_COMPILERRT_FUNCTION_ALIAS(__lesf2, __cmpsf2)
+DEFINE_COMPILERRT_FUNCTION_ALIAS(__ltsf2, __cmpsf2)
+DEFINE_COMPILERRT_FUNCTION_ALIAS(__eqsf2, __cmpsf2)
+DEFINE_COMPILERRT_FUNCTION_ALIAS(__nesf2, __cmpsf2)
+
+DEFINE_COMPILERRT_THUMB_FUNCTION(__compiler_rt_softfp_cmpsf2)
+  #include "fcmp.h"
+
+LOCAL_LABEL(NaN):
+  movs r0, #1
+  bx lr
+
+END_COMPILERRT_FUNCTION(__compiler_rt_softfp_cmpsf2)
+
+NO_EXEC_STACK_DIRECTIVE
diff --git a/compiler-rt/lib/builtins/arm/thumb1/fcmp.h 
b/compiler-rt/lib/builtins/arm/thumb1/fcmp.h
new file mode 100644
index 0000000000000..bcfe928407e3c
--- /dev/null
+++ b/compiler-rt/lib/builtins/arm/thumb1/fcmp.h
@@ -0,0 +1,191 @@
+//===-- fcmp.h - shared code for single-precision FP comparison functions 
-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This code is the skeleton of a double-precision FP compare, with two details
+// left out: which input value is in which register, and how to make the return
+// value. It allows the main comparison logic to be shared between (for
+// example) __lesf2 and __gesf2, varying only those details.
+//
+//===----------------------------------------------------------------------===//
+
+// How to use this header file:
+//
+// This header file is expected to be #included from inside a function
+// definition in a .S file. The source file including this header should
+// provide the following:
+//
+// op0 and op1: register aliases (via .req) for the registers containing the
+// input operands.
+//  - For most comparisons, op0 will correspond to r0 and op1 to r1.
+//  - But a function with the reversed semantics of __aeabi_cfrcmple wil define
+//    them the other way round.
+//
+// SetReturnRegister: an assembly macro that looks at the PSR flags and sets up
+// an appropriate return value in r0, for the cases that do *not* involve NaN.
+//  - On entry to this macro, the condition codes LO, EQ and HI indicate that
+//    op0 < op1, op0 == op1 or op0 > op1 respectively.
+//  - For functions that return a result in the flags, this macro can be empty,
+//    because those are the correct flags to return anyway.
+//  - Functions that return a boolean in r0 should set it up by checking the
+//    flags.
+//
+// LOCAL_LABEL(NaN): a label defined within the compare function, after the
+// #include of this header. Called when at least one input is a NaN, and sets
+// up the appropriate return value for that case.
+
+// --------------------------------------------------
+// The actual entry point of the compare function.
+//
+// The basic plan is to start by ORing together the two inputs. This tells us
+// two things:
+//  - the top bit of the output tells us whether both inputs are positive, or
+//    whether at least one is negative
+//  - if the 8 exponent bits of the output are not all 1, then there are
+//    definitely no NaNs, so a fast path can handle most non-NaN cases.
+
+  // Set up the constant 1 << 23 in a register, which we'll need on all
+  // branches.
+  movs    r3, #1
+  lsls    r3, r3, #23
+
+  // Diverge control for the negative-numbers case.
+  movs    r2, op0
+  orrs    r2, r2, op1
+  bmi     LOCAL_LABEL(negative)         // high bit set => at least one 
negative input
+
+  // Here, both inputs are positive. Try adding 1<<23 to their bitwise OR in
+  // r2. This will carry all the way into the top bit, setting the N flag, if
+  // all 8 exponent bits were set.
+  cmn     r2, r3
+  bmi     LOCAL_LABEL(NaNInf_check_positive) // need to look harder for NaNs
+
+  // The fastest fast path: both inputs positive and we could easily tell there
+  // were no NaNs. So we just compare op0 and op1 as unsigned integers.
+  cmp     op0, op1
+  SetReturnRegister
+  bx      lr
+
+LOCAL_LABEL(NaNInf_check_positive):
+  // Second tier for positive numbers. We come here if both inputs are
+  // positive, but our fast initial check didn't manage to rule out a NaN. But
+  // it's not guaranteed that there _is_ a NaN, for two reasons:
+  //
+  //  1. An input with exponent 0xFF might be an infinity instead. Those behave
+  //    normally under comparison.
+  //
+  //  2. There might not even _be_ an input with exponent 0xFF. All we know so
+  //     far is that the two inputs ORed together had all the exponent bits
+  //     set. So each of those bits is set in _at least one_ of the inputs, but
+  //     not necessarily all in the _same_ input.
+  //
+  // Test each exponent individually for 0xFF, using the same CMN idiom as
+  // above. If neither one carries into the sign bit then we have no NaNs _or_
+  // infinities and can compare the registers and return again.
+  cmn     op0, r3
+  bmi     LOCAL_LABEL(NaN_check_positive)
+  cmn     op1, r3
+  bmi     LOCAL_LABEL(NaN_check_positive)
+
+  // Second-tier return path, now we've ruled out anything difficult.
+  cmp     op0, op1
+  SetReturnRegister
+  bx      lr
+
+LOCAL_LABEL(NaN_check_positive):
+  // Third tier for positive numbers. Here we know that at least one of the
+  // inputs has exponent 0xFF. But they might still be infinities rather than
+  // NaNs. So now we must check whether there's an actual NaN, by shifting each
+  // input left to get rid of the sign bit, and seeing if the result is
+  // _greater_ than 0xFF000000 (but not equal).
+  //
+  // We could have skipped the second-tier check and done this more rigorous
+  // test immediately. But that would cost an extra instruction in the case
+  // where there are no infinities or NaNs, and we assume that that is so much
+  // more common that it's worth optimizing for.
+  movs    r2, #0xFF
+  lsls    r2, r2, #24
+  lsls    r3, op0, #1
+  cmp     r3, r2
+  bhi     LOCAL_LABEL(NaN)
+  lsls    r3, op1, #1
+  cmp     r3, r2
+  bhi     LOCAL_LABEL(NaN)
+
+  // Now we've finally ruled out NaNs! And we still know both inputs are
+  // positive. So the third-tier return path can just compare the numbers
+  // again.
+  cmp     op0, op1
+  SetReturnRegister
+  bx      lr
+
+LOCAL_LABEL(negative):
+  // We come here if at least one operand is negative. We haven't checked for
+  // NaNs at all yet (the sign check came first), so repeat the first-tier
+  // check strategy of seeing if all exponent bits are set in r12.
+  //
+  // On this path, the sign bit in r12 is set, so if adding 1 to the low
+  // exponent bit carries all the way through into the sign bit, it will
+  // _clear_ the sign bit rather than setting it. So we expect MI to be the
+  // "definitely no NaNs" result, where it was PL on the positive branch.
+  cmn     r2, r3
+  bpl     LOCAL_LABEL(NaNInf_check_negative)
+
+  // Now we have no NaNs, but at least one negative number. This gives us two
+  // complications:
+  //
+  //  1. Floating-point numbers are sign/magnitude, not two's complement, so we
+  //     have to consider separately the cases of "both negative" and "one of
+  //     each sign".
+  //
+  //  2. -0 and +0 are required to compare equal.
+  //
+  // But problem #1 is not as hard as it sounds! If both operands are negative,
+  // then we can get the result we want by comparing them as unsigned integers
+  // the opposite way round, because the input with the smaller value (as an
+  // integer) is the larger number in an FP ordering sense. And if one operand
+  // is negative and the other is positive, the _same_ reversed comparison
+  // works, because the positive number (with zero sign bit) will always
+  // compare less than the negative one in an unsigned-integers sense.
+  //
+  // So we only have to worry about problem #2, signed zeroes. This only
+  // affects the answer if _both_ operands are zero. And we can check that
+  // easily, because it happens if and only if r12 = 0x80000000. (We know r12
+  // has its sign bit set; if it has no other bits set, that's because both
+  // inputs were either 0x80000000 or 0x00000000.)
+  lsls    r2, r2, #1              // EQ if both inputs are zero (also sets C)
+  beq     1f
+  cmp     op1, op0                // otherwise, compare them backwards
+1:
+  SetReturnRegister
+  bx      lr
+
+LOCAL_LABEL(NaNInf_check_negative):
+  // Second tier for negative numbers: we know the OR of the exponents is 0xFF,
+  // but again, we might not have either _actual_ exponent 0xFF, and also, an
+  // exponent 0xFF might be an infinity instead of a NaN.
+  //
+  // On this path we've already branched twice (once for negative numbers and
+  // once for the first-tier NaN check), so we'll just go straight to the
+  // precise check for NaNs.
+  movs    r2, #0xFF
+  lsls    r2, r2, #24
+  lsls    r3, op0, #1
+  cmp     r3, r2
+  bhi     LOCAL_LABEL(NaN)
+  lsls    r3, op1, #1
+  cmp     r3, r2
+  bhi     LOCAL_LABEL(NaN)
+
+  // Now we've ruled out NaNs, so we can just compare the two input registers
+  // and return. On this path we _don't_ need to check for the special case of
+  // comparing two zeroes, because we only came here if the bitwise OR of the
+  // exponent fields was 0xFF, which means the exponents can't both have been
+  // zero! So we can _just_ do the reversed CMP and finish.
+  cmp     op1, op0
+  SetReturnRegister
+  bx      lr
diff --git a/compiler-rt/lib/builtins/arm/thumb1/gesf2.S 
b/compiler-rt/lib/builtins/arm/thumb1/gesf2.S
new file mode 100644
index 0000000000000..aa75ec7b0a67b
--- /dev/null
+++ b/compiler-rt/lib/builtins/arm/thumb1/gesf2.S
@@ -0,0 +1,54 @@
+//===-- gesf2.S - single-precision floating point comparison 
--------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This function has the semantics of GNU __cmpsf2, except for its NaN
+// handling. It's a three-way compare which returns <0 if x<y, 0 if x==y, and
+// >0 if x>y. If the result is unordered (i.e. x or y or both is NaN) then it
+// returns <0, where __cmpsf2 would return >0.
+//
+// This also makes it suitable for use as __gtsf2 or __gesf2 (or __eqsf2 or
+// __nesf2).
+//
+//===----------------------------------------------------------------------===//
+
+#include "../../assembly.h"
+
+  .syntax unified
+  .text
+  .p2align 2
+
+op0 .req r0
+op1 .req r1
+.macro SetReturnRegister
+  bhi 0f
+  blo 1f
+  movs r0, #0
+  bx lr
+0:
+  movs r0, #1
+  bx lr
+1:
+  movs r0, #1
+  rsbs r0, r0, #0
+  bx lr
+.endm
+
+DEFINE_COMPILERRT_FUNCTION_ALIAS(__gesf2, __compiler_rt_softfp_gesf2)
+DEFINE_COMPILERRT_FUNCTION_ALIAS(__gtsf2, __gesf2)
+
+DEFINE_COMPILERRT_THUMB_FUNCTION(__compiler_rt_softfp_gesf2)
+  #include "fcmp.h"
+
+LOCAL_LABEL(NaN):
+  movs r0, #1
+  rsbs r0, r0, #0
+  bx lr
+
+END_COMPILERRT_FUNCTION(__compiler_rt_softfp_gesf2)
+
+NO_EXEC_STACK_DIRECTIVE
diff --git a/compiler-rt/lib/builtins/arm/thumb1/unordsf2.S 
b/compiler-rt/lib/builtins/arm/thumb1/unordsf2.S
new file mode 100644
index 0000000000000..5d74e0fdfe159
--- /dev/null
+++ b/compiler-rt/lib/builtins/arm/thumb1/unordsf2.S
@@ -0,0 +1,49 @@
+//===-- unordsf2.S - single-precision floating point comparison 
-----------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Return 1 if the result of comparing x with y is 'unordered', i.e.
+// one of x and y is NaN.
+//
+//===----------------------------------------------------------------------===//
+
+#include "../../assembly.h"
+
+  .syntax unified
+  .text
+  .p2align 2
+
+DEFINE_COMPILERRT_FUNCTION_ALIAS(__unordsf2, __aeabi_fcmpun)
+
+DEFINE_COMPILERRT_THUMB_FUNCTION(__aeabi_fcmpun)
+
+  // This function isn't based on the general-purpose code in fcmp.h, because
+  // it's more effort than needed. Here we just need to identify whether or not
+  // there's at least one NaN in the inputs. There's no need to vary that check
+  // based on the sign bit, so we might as well just do the NaN test as quickly
+  // as possible.
+  movs    r2, #0xFF
+  lsls    r2, r2, #24
+  lsls    r3, r0, #1
+  cmp     r3, r2
+  bhi     LOCAL_LABEL(NaN)
+  lsls    r3, r1, #1
+  cmp     r3, r2
+  bhi     LOCAL_LABEL(NaN)
+
+  // If HS, then we have no NaNs and return false.
+  movs    r0, #0
+  bx      lr
+
+  // Otherwise, we have at least one NaN, and return true.
+LOCAL_LABEL(NaN):
+  movs    r0, #1
+  bx      lr
+
+END_COMPILERRT_FUNCTION(__aeabi_fcmpun)
+
+NO_EXEC_STACK_DIRECTIVE
diff --git a/compiler-rt/lib/builtins/arm/unordsf2.S 
b/compiler-rt/lib/builtins/arm/unordsf2.S
new file mode 100644
index 0000000000000..1930996779888
--- /dev/null
+++ b/compiler-rt/lib/builtins/arm/unordsf2.S
@@ -0,0 +1,56 @@
+//===-- unordsf2.S - single-precision floating point comparison 
-----------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Return 1 if the result of comparing x with y is 'unordered', i.e.
+// one of x and y is NaN.
+//
+//===----------------------------------------------------------------------===//
+
+#include "../assembly.h"
+
+
+  .syntax unified
+  .text
+  .p2align 2
+
+#if __ARM_PCS_VFP
+DEFINE_COMPILERRT_FUNCTION(__unordsf2)
+  push {r4, lr}
+  vmov r0, s0
+  vmov r1, s1
+  bl __aeabi_fcmpun
+  pop {r4, pc}
+#else
+DEFINE_COMPILERRT_FUNCTION_ALIAS(__unordsf2, __aeabi_fcmpun)
+#endif
+
+DEFINE_COMPILERRT_FUNCTION(__aeabi_fcmpun)
+
+  // This function isn't based on the general-purpose code in fcmp.h, because
+  // it's more effort than needed. Here we just need to identify whether or not
+  // there's at least one NaN in the inputs. There's no need to vary that check
+  // based on the sign bit, so we might as well just do the NaN test as quickly
+  // as possible.
+  mov     r12, #0xFF << 24
+  cmp     r12, r0, lsl #1    // if LO, then r12 < (r0 << 1), so r0 is a NaN
+  cmphs   r12, r1, lsl #1    // if not LO, then do the same check for r1
+
+  // If HS, then we have no NaNs and return false. We do this as quickly as we
+  // can (not stopping to take two instructions setting up r0 for both
+  // possibilities), on the assumption that NaNs are rare and we want to
+  // optimize for the non-NaN path.
+  movhs   r0, #0
+  bxhs    lr
+
+  // Otherwise, we have at least one NaN, and return true.
+  mov     r0, #1
+  bx      lr
+
+END_COMPILERRT_FUNCTION(__aeabi_fcmpun)
+
+NO_EXEC_STACK_DIRECTIVE
diff --git a/compiler-rt/test/builtins/Unit/comparesf2new_test.c 
b/compiler-rt/test/builtins/Unit/comparesf2new_test.c
new file mode 100644
index 0000000000000..5c8be88354618
--- /dev/null
+++ b/compiler-rt/test/builtins/Unit/comparesf2new_test.c
@@ -0,0 +1,433 @@
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+// RUN: %clang_builtins %s %librt -o %t && %run %t
+// REQUIRES: librt_has_comparesf2
+
+#include "int_lib.h"
+#include <inttypes.h>
+#include <stdio.h>
+
+#include "fp_test.h"
+
+COMPILER_RT_ABI int __eqsf2(float, float);
+COMPILER_RT_ABI int __nesf2(float, float);
+COMPILER_RT_ABI int __gesf2(float, float);
+COMPILER_RT_ABI int __gtsf2(float, float);
+COMPILER_RT_ABI int __lesf2(float, float);
+COMPILER_RT_ABI int __ltsf2(float, float);
+COMPILER_RT_ABI int __cmpsf2(float, float);
+COMPILER_RT_ABI int __unordsf2(float, float);
+
+enum Result {
+  RESULT_LT,
+  RESULT_GT,
+  RESULT_EQ,
+  RESULT_UN
+};
+
+int expect(int line, uint32_t a_rep, uint32_t b_rep, const char *name, int 
result, int ok, const char *expected) {
+  if (!ok)
+    printf("error at line %d: %s(%08" PRIx32 ", %08" PRIx32 ") = %d, expected 
%s\n",
+           line, name, a_rep, b_rep, result, expected);
+  return !ok;
+}
+
+int test__comparesf2(int line, uint32_t a_rep, uint32_t b_rep, enum Result 
result) {
+  float a = fromRep32(a_rep), b = fromRep32(b_rep);
+
+  int eq = __eqsf2(a, b);
+  int ne = __nesf2(a, b);
+  int ge = __gesf2(a, b);
+  int gt = __gtsf2(a, b);
+  int le = __lesf2(a, b);
+  int lt = __ltsf2(a, b);
+  int cmp = __cmpsf2(a, b);
+  int unord = __unordsf2(a, b);
+
+  int ret = 0;
+
+  switch (result) {
+  case RESULT_LT:
+    ret |= expect(line, a_rep, b_rep, "__eqsf2", eq, eq != 0, "!= 0");
+    ret |= expect(line, a_rep, b_rep, "__nesf2", ne, ne != 0, "!= 0");
+    ret |= expect(line, a_rep, b_rep, "__gesf2", ge, ge < 0, "< 0");
+    ret |= expect(line, a_rep, b_rep, "__gtsf2", gt, gt <= 0, "<= 0");
+    ret |= expect(line, a_rep, b_rep, "__lesf2", le, le <= 0, "<= 0");
+    ret |= expect(line, a_rep, b_rep, "__ltsf2", lt, lt < 0, "< 0");
+    ret |= expect(line, a_rep, b_rep, "__cmpsf2", cmp, cmp == -1, "== -1");
+    ret |= expect(line, a_rep, b_rep, "__unordsf2", unord, unord == 0, "== 0");
+    break;
+  case RESULT_GT:
+    ret |= expect(line, a_rep, b_rep, "__eqsf2", eq, eq != 0, "!= 0");
+    ret |= expect(line, a_rep, b_rep, "__nesf2", ne, ne != 0, "!= 0");
+    ret |= expect(line, a_rep, b_rep, "__gesf2", ge, ge >= 0, ">= 0");
+    ret |= expect(line, a_rep, b_rep, "__gtsf2", gt, gt > 0, "> 0");
+    ret |= expect(line, a_rep, b_rep, "__lesf2", le, le > 0, "> 0");
+    ret |= expect(line, a_rep, b_rep, "__ltsf2", lt, lt >= 0, ">= 0");
+    ret |= expect(line, a_rep, b_rep, "__cmpsf2", cmp, cmp == 1, "== 1");
+    ret |= expect(line, a_rep, b_rep, "__unordsf2", unord, unord == 0, "== 0");
+    break;
+  case RESULT_EQ:
+    ret |= expect(line, a_rep, b_rep, "__eqsf2", eq, eq == 0, "== 0");
+    ret |= expect(line, a_rep, b_rep, "__nesf2", ne, ne == 0, "== 0");
+    ret |= expect(line, a_rep, b_rep, "__gesf2", ge, ge >= 0, ">= 0");
+    ret |= expect(line, a_rep, b_rep, "__gtsf2", gt, gt <= 0, "<= 0");
+    ret |= expect(line, a_rep, b_rep, "__lesf2", le, le <= 0, "<= 0");
+    ret |= expect(line, a_rep, b_rep, "__ltsf2", lt, lt >= 0, ">= 0");
+    ret |= expect(line, a_rep, b_rep, "__cmpsf2", cmp, cmp == 0, "== 0");
+    ret |= expect(line, a_rep, b_rep, "__unordsf2", unord, unord == 0, "== 0");
+    break;
+  case RESULT_UN:
+    ret |= expect(line, a_rep, b_rep, "__eqsf2", eq, eq != 0, "!= 0");
+    ret |= expect(line, a_rep, b_rep, "__nesf2", ne, ne != 0, "!= 0");
+    ret |= expect(line, a_rep, b_rep, "__gesf2", ge, ge < 0, "< 0");
+    ret |= expect(line, a_rep, b_rep, "__gtsf2", gt, gt <= 0, "<= 0");
+    ret |= expect(line, a_rep, b_rep, "__lesf2", le, le > 0, "> 0");
+    ret |= expect(line, a_rep, b_rep, "__ltsf2", lt, lt >= 0, ">= 0");
+    ret |= expect(line, a_rep, b_rep, "__cmpsf2", cmp, cmp == 1, "== 1");
+    ret |= expect(line, a_rep, b_rep, "__unordsf2", unord, unord == 1, "== 1");
+    break;
+  }
+
+  return ret;
+}
+
+#define test__comparesf2(a,b,x) test__comparesf2(__LINE__,a,b,x)
+
+int main(void) {
+  int status = 0;
+
+  status |= test__comparesf2(0x00000000, 0x00000001, RESULT_LT);
+  status |= test__comparesf2(0x00000000, 0x007fffff, RESULT_LT);
+  status |= test__comparesf2(0x00000000, 0x3f800000, RESULT_LT);
+  status |= test__comparesf2(0x00000000, 0x7f000000, RESULT_LT);
+  status |= test__comparesf2(0x00000000, 0x7f800000, RESULT_LT);
+  status |= test__comparesf2(0x00000000, 0x7f872da0, RESULT_UN);
+  status |= test__comparesf2(0x00000000, 0x7fe42e09, RESULT_UN);
+  status |= test__comparesf2(0x00000000, 0x80000000, RESULT_EQ);
+  status |= test__comparesf2(0x00000000, 0x80000001, RESULT_GT);
+  status |= test__comparesf2(0x00000000, 0x807fffff, RESULT_GT);
+  status |= test__comparesf2(0x00000000, 0x80800000, RESULT_GT);
+  status |= test__comparesf2(0x00000000, 0xff800000, RESULT_GT);
+  status |= test__comparesf2(0x00000001, 0x00000001, RESULT_EQ);
+  status |= test__comparesf2(0x00000001, 0x3f7fffff, RESULT_LT);
+  status |= test__comparesf2(0x00000001, 0x3f800000, RESULT_LT);
+  status |= test__comparesf2(0x00000001, 0x3ffffffe, RESULT_LT);
+  status |= test__comparesf2(0x00000001, 0x3fffffff, RESULT_LT);
+  status |= test__comparesf2(0x00000001, 0x7effffff, RESULT_LT);
+  status |= test__comparesf2(0x00000001, 0x7f000000, RESULT_LT);
+  status |= test__comparesf2(0x00000001, 0x7f7ffffe, RESULT_LT);
+  status |= test__comparesf2(0x00000001, 0x7f7fffff, RESULT_LT);
+  status |= test__comparesf2(0x00000001, 0x7f94d5b9, RESULT_UN);
+  status |= test__comparesf2(0x00000001, 0x7fef53b1, RESULT_UN);
+  status |= test__comparesf2(0x00000001, 0x80000001, RESULT_GT);
+  status |= test__comparesf2(0x00000001, 0xbf7fffff, RESULT_GT);
+  status |= test__comparesf2(0x00000001, 0xbf800000, RESULT_GT);
+  status |= test__comparesf2(0x00000001, 0xbffffffe, RESULT_GT);
+  status |= test__comparesf2(0x00000001, 0xbfffffff, RESULT_GT);
+  status |= test__comparesf2(0x00000001, 0xfeffffff, RESULT_GT);
+  status |= test__comparesf2(0x00000001, 0xff000000, RESULT_GT);
+  status |= test__comparesf2(0x00000001, 0xff7ffffe, RESULT_GT);
+  status |= test__comparesf2(0x00000001, 0xff7fffff, RESULT_GT);
+  status |= test__comparesf2(0x00000002, 0x00000001, RESULT_GT);
+  status |= test__comparesf2(0x00000003, 0x00000002, RESULT_GT);
+  status |= test__comparesf2(0x00000003, 0x40400000, RESULT_LT);
+  status |= test__comparesf2(0x00000003, 0x40a00000, RESULT_LT);
+  status |= test__comparesf2(0x00000003, 0x7f000000, RESULT_LT);
+  status |= test__comparesf2(0x00000003, 0xc0a00000, RESULT_GT);
+  status |= test__comparesf2(0x00000003, 0xff000000, RESULT_GT);
+  status |= test__comparesf2(0x00000004, 0x00000004, RESULT_EQ);
+  status |= test__comparesf2(0x007ffffc, 0x807ffffc, RESULT_GT);
+  status |= test__comparesf2(0x007ffffd, 0x007ffffe, RESULT_LT);
+  status |= test__comparesf2(0x007fffff, 0x00000000, RESULT_GT);
+  status |= test__comparesf2(0x007fffff, 0x007ffffe, RESULT_GT);
+  status |= test__comparesf2(0x007fffff, 0x007fffff, RESULT_EQ);
+  status |= test__comparesf2(0x007fffff, 0x00800000, RESULT_LT);
+  status |= test__comparesf2(0x007fffff, 0x7f800000, RESULT_LT);
+  status |= test__comparesf2(0x007fffff, 0x7fa111d3, RESULT_UN);
+  status |= test__comparesf2(0x007fffff, 0x7ff43134, RESULT_UN);
+  status |= test__comparesf2(0x007fffff, 0x80000000, RESULT_GT);
+  status |= test__comparesf2(0x007fffff, 0xff800000, RESULT_GT);
+  status |= test__comparesf2(0x00800000, 0x00000000, RESULT_GT);
+  status |= test__comparesf2(0x00800000, 0x00800000, RESULT_EQ);
+  status |= test__comparesf2(0x00800000, 0x80800000, RESULT_GT);
+  status |= test__comparesf2(0x00800001, 0x00800000, RESULT_GT);
+  status |= test__comparesf2(0x00800001, 0x00800002, RESULT_LT);
+  status |= test__comparesf2(0x00ffffff, 0x01000000, RESULT_LT);
+  status |= test__comparesf2(0x00ffffff, 0x01000002, RESULT_LT);
+  status |= test__comparesf2(0x00ffffff, 0x01000004, RESULT_LT);
+  status |= test__comparesf2(0x01000000, 0x00ffffff, RESULT_GT);
+  status |= test__comparesf2(0x01000001, 0x00800001, RESULT_GT);
+  status |= test__comparesf2(0x01000001, 0x00ffffff, RESULT_GT);
+  status |= test__comparesf2(0x01000002, 0x00800001, RESULT_GT);
+  status |= test__comparesf2(0x017fffff, 0x01800000, RESULT_LT);
+  status |= test__comparesf2(0x01800000, 0x017fffff, RESULT_GT);
+  status |= test__comparesf2(0x01800001, 0x017fffff, RESULT_GT);
+  status |= test__comparesf2(0x01800002, 0x01000003, RESULT_GT);
+  status |= test__comparesf2(0x3f000000, 0x3f000000, RESULT_EQ);
+  status |= test__comparesf2(0x3f7fffff, 0x00000001, RESULT_GT);
+  status |= test__comparesf2(0x3f7fffff, 0x80000001, RESULT_GT);
+  status |= test__comparesf2(0x3f800000, 0x3f800000, RESULT_EQ);
+  status |= test__comparesf2(0x3f800000, 0x3f800003, RESULT_LT);
+  status |= test__comparesf2(0x3f800000, 0x40000000, RESULT_LT);
+  status |= test__comparesf2(0x3f800000, 0x40e00000, RESULT_LT);
+  status |= test__comparesf2(0x3f800000, 0x7fb27f62, RESULT_UN);
+  status |= test__comparesf2(0x3f800000, 0x7fd9d4b4, RESULT_UN);
+  status |= test__comparesf2(0x3f800000, 0x80000000, RESULT_GT);
+  status |= test__comparesf2(0x3f800000, 0xbf800000, RESULT_GT);
+  status |= test__comparesf2(0x3f800000, 0xbf800003, RESULT_GT);
+  status |= test__comparesf2(0x3f800001, 0x3f800000, RESULT_GT);
+  status |= test__comparesf2(0x3f800001, 0x3f800002, RESULT_LT);
+  status |= test__comparesf2(0x3f800001, 0xbf800000, RESULT_GT);
+  status |= test__comparesf2(0x3ffffffc, 0x3ffffffd, RESULT_LT);
+  status |= test__comparesf2(0x3fffffff, 0x00000001, RESULT_GT);
+  status |= test__comparesf2(0x3fffffff, 0x40000000, RESULT_LT);
+  status |= test__comparesf2(0x40000000, 0x3f800000, RESULT_GT);
+  status |= test__comparesf2(0x40000000, 0x3fffffff, RESULT_GT);
+  status |= test__comparesf2(0x40000000, 0x40000000, RESULT_EQ);
+  status |= test__comparesf2(0x40000000, 0x40000001, RESULT_LT);
+  status |= test__comparesf2(0x40000000, 0xc0000000, RESULT_GT);
+  status |= test__comparesf2(0x40000000, 0xc0000001, RESULT_GT);
+  status |= test__comparesf2(0x40000000, 0xc0a00000, RESULT_GT);
+  status |= test__comparesf2(0x40000001, 0x3f800001, RESULT_GT);
+  status |= test__comparesf2(0x40000001, 0x40000002, RESULT_LT);
+  status |= test__comparesf2(0x40000001, 0xc0000002, RESULT_GT);
+  status |= test__comparesf2(0x40000002, 0x3f800001, RESULT_GT);
+  status |= test__comparesf2(0x40000002, 0x3f800003, RESULT_GT);
+  status |= test__comparesf2(0x40000004, 0x40000003, RESULT_GT);
+  status |= test__comparesf2(0x40400000, 0x40400000, RESULT_EQ);
+  status |= test__comparesf2(0x407fffff, 0x407ffffe, RESULT_GT);
+  status |= test__comparesf2(0x407fffff, 0x40800002, RESULT_LT);
+  status |= test__comparesf2(0x40800001, 0x407fffff, RESULT_GT);
+  status |= test__comparesf2(0x40a00000, 0x00000000, RESULT_GT);
+  status |= test__comparesf2(0x40a00000, 0x80000000, RESULT_GT);
+  status |= test__comparesf2(0x40a00000, 0xbf800000, RESULT_GT);
+  status |= test__comparesf2(0x40a00000, 0xc0a00000, RESULT_GT);
+  status |= test__comparesf2(0x7d800001, 0x7d7fffff, RESULT_GT);
+  status |= test__comparesf2(0x7e7fffff, 0x7e7ffffe, RESULT_GT);
+  status |= test__comparesf2(0x7e7fffff, 0x7e800002, RESULT_LT);
+  status |= test__comparesf2(0x7e800000, 0x7e7fffff, RESULT_GT);
+  status |= test__comparesf2(0x7e800000, 0x7e800000, RESULT_EQ);
+  status |= test__comparesf2(0x7e800000, 0x7e800001, RESULT_LT);
+  status |= test__comparesf2(0x7e800001, 0x7e800000, RESULT_GT);
+  status |= test__comparesf2(0x7e800001, 0x7f000001, RESULT_LT);
+  status |= test__comparesf2(0x7e800001, 0xfe800000, RESULT_GT);
+  status |= test__comparesf2(0x7e800002, 0x7e000003, RESULT_GT);
+  status |= test__comparesf2(0x7e800004, 0x7e800003, RESULT_GT);
+  status |= test__comparesf2(0x7efffffe, 0x7efffffe, RESULT_EQ);
+  status |= test__comparesf2(0x7efffffe, 0x7effffff, RESULT_LT);
+  status |= test__comparesf2(0x7efffffe, 0xfeffffff, RESULT_GT);
+  status |= test__comparesf2(0x7effffff, 0x3f800000, RESULT_GT);
+  status |= test__comparesf2(0x7effffff, 0x7f000000, RESULT_LT);
+  status |= test__comparesf2(0x7effffff, 0xbf800000, RESULT_GT);
+  status |= test__comparesf2(0x7effffff, 0xff000000, RESULT_GT);
+  status |= test__comparesf2(0x7f000000, 0x3f800000, RESULT_GT);
+  status |= test__comparesf2(0x7f000000, 0x7f000000, RESULT_EQ);
+  status |= test__comparesf2(0x7f000000, 0x7f800000, RESULT_LT);
+  status |= test__comparesf2(0x7f000000, 0xbf800000, RESULT_GT);
+  status |= test__comparesf2(0x7f000000, 0xff000000, RESULT_GT);
+  status |= test__comparesf2(0x7f000000, 0xff800000, RESULT_GT);
+  status |= test__comparesf2(0x7f000001, 0x7f000000, RESULT_GT);
+  status |= test__comparesf2(0x7f000001, 0x7f000002, RESULT_LT);
+  status |= test__comparesf2(0x7f000001, 0xff000000, RESULT_GT);
+  status |= test__comparesf2(0x7f000002, 0x7e800001, RESULT_GT);
+  status |= test__comparesf2(0x7f7ffffe, 0x3f800000, RESULT_GT);
+  status |= test__comparesf2(0x7f7ffffe, 0x7f7fffff, RESULT_LT);
+  status |= test__comparesf2(0x7f7ffffe, 0xbf800000, RESULT_GT);
+  status |= test__comparesf2(0x7f7ffffe, 0xff7fffff, RESULT_GT);
+  status |= test__comparesf2(0x7f7fffff, 0x00000001, RESULT_GT);
+  status |= test__comparesf2(0x7f7fffff, 0x3f800000, RESULT_GT);
+  status |= test__comparesf2(0x7f7fffff, 0x7f7fffff, RESULT_EQ);
+  status |= test__comparesf2(0x7f7fffff, 0x7fbed1eb, RESULT_UN);
+  status |= test__comparesf2(0x7f7fffff, 0x7fe15ee3, RESULT_UN);
+  status |= test__comparesf2(0x7f7fffff, 0x80000001, RESULT_GT);
+  status |= test__comparesf2(0x7f7fffff, 0xbf800000, RESULT_GT);
+  status |= test__comparesf2(0x7f800000, 0x00000000, RESULT_GT);
+  status |= test__comparesf2(0x7f800000, 0x00000001, RESULT_GT);
+  status |= test__comparesf2(0x7f800000, 0x007fffff, RESULT_GT);
+  status |= test__comparesf2(0x7f800000, 0x7f000000, RESULT_GT);
+  status |= test__comparesf2(0x7f800000, 0x7f7fffff, RESULT_GT);
+  status |= test__comparesf2(0x7f800000, 0x7f800000, RESULT_EQ);
+  status |= test__comparesf2(0x7f800000, 0x7f91a4da, RESULT_UN);
+  status |= test__comparesf2(0x7f800000, 0x7fd44a09, RESULT_UN);
+  status |= test__comparesf2(0x7f800000, 0x80000000, RESULT_GT);
+  status |= test__comparesf2(0x7f800000, 0x80000001, RESULT_GT);
+  status |= test__comparesf2(0x7f800000, 0x807fffff, RESULT_GT);
+  status |= test__comparesf2(0x7f800000, 0xff000000, RESULT_GT);
+  status |= test__comparesf2(0x7f800000, 0xff7fffff, RESULT_GT);
+  status |= test__comparesf2(0x7f800000, 0xff800000, RESULT_GT);
+  status |= test__comparesf2(0x7f86d066, 0x00000000, RESULT_UN);
+  status |= test__comparesf2(0x7f85a878, 0x00000001, RESULT_UN);
+  status |= test__comparesf2(0x7f8c0dca, 0x007fffff, RESULT_UN);
+  status |= test__comparesf2(0x7f822725, 0x3f800000, RESULT_UN);
+  status |= test__comparesf2(0x7f853870, 0x7f7fffff, RESULT_UN);
+  status |= test__comparesf2(0x7fbefc9d, 0x7f800000, RESULT_UN);
+  status |= test__comparesf2(0x7f9f84a9, 0x7f81461b, RESULT_UN);
+  status |= test__comparesf2(0x7f9e2c1d, 0x7fe4a313, RESULT_UN);
+  status |= test__comparesf2(0x7fb0e6d0, 0x80000000, RESULT_UN);
+  status |= test__comparesf2(0x7fac9171, 0x80000001, RESULT_UN);
+  status |= test__comparesf2(0x7f824ae6, 0x807fffff, RESULT_UN);
+  status |= test__comparesf2(0x7fa8b9a0, 0xbf800000, RESULT_UN);
+  status |= test__comparesf2(0x7f92a1cd, 0xff7fffff, RESULT_UN);
+  status |= test__comparesf2(0x7fbe5d29, 0xff800000, RESULT_UN);
+  status |= test__comparesf2(0x7fcc9a57, 0x00000000, RESULT_UN);
+  status |= test__comparesf2(0x7fec9d71, 0x00000001, RESULT_UN);
+  status |= test__comparesf2(0x7fd5db76, 0x007fffff, RESULT_UN);
+  status |= test__comparesf2(0x7fd003d9, 0x3f800000, RESULT_UN);
+  status |= test__comparesf2(0x7fca0684, 0x7f7fffff, RESULT_UN);
+  status |= test__comparesf2(0x7fc46aa0, 0x7f800000, RESULT_UN);
+  status |= test__comparesf2(0x7ff72b19, 0x7faee637, RESULT_UN);
+  status |= test__comparesf2(0x7fe9e0c1, 0x7fcc2788, RESULT_UN);
+  status |= test__comparesf2(0x7fc571ea, 0x80000000, RESULT_UN);
+  status |= test__comparesf2(0x7fd81a54, 0x80000001, RESULT_UN);
+  status |= test__comparesf2(0x7febdfaf, 0x807fffff, RESULT_UN);
+  status |= test__comparesf2(0x7ffa1f94, 0xbf800000, RESULT_UN);
+  status |= test__comparesf2(0x7ff38fa0, 0xff7fffff, RESULT_UN);
+  status |= test__comparesf2(0x7fdf3502, 0xff800000, RESULT_UN);
+  status |= test__comparesf2(0x80000000, 0x00000000, RESULT_EQ);
+  status |= test__comparesf2(0x80000000, 0x00000001, RESULT_LT);
+  status |= test__comparesf2(0x80000000, 0x007fffff, RESULT_LT);
+  status |= test__comparesf2(0x80000000, 0x7f000000, RESULT_LT);
+  status |= test__comparesf2(0x80000000, 0x7f800000, RESULT_LT);
+  status |= test__comparesf2(0x80000000, 0x7fbdfb72, RESULT_UN);
+  status |= test__comparesf2(0x80000000, 0x7fdd528e, RESULT_UN);
+  status |= test__comparesf2(0x80000000, 0x80000001, RESULT_GT);
+  status |= test__comparesf2(0x80000000, 0x807fffff, RESULT_GT);
+  status |= test__comparesf2(0x80000000, 0x80800000, RESULT_GT);
+  status |= test__comparesf2(0x80000000, 0xbf800000, RESULT_GT);
+  status |= test__comparesf2(0x80000000, 0xff800000, RESULT_GT);
+  status |= test__comparesf2(0x80000001, 0x00000001, RESULT_LT);
+  status |= test__comparesf2(0x80000001, 0x3f7fffff, RESULT_LT);
+  status |= test__comparesf2(0x80000001, 0x3f800000, RESULT_LT);
+  status |= test__comparesf2(0x80000001, 0x3ffffffe, RESULT_LT);
+  status |= test__comparesf2(0x80000001, 0x3fffffff, RESULT_LT);
+  status |= test__comparesf2(0x80000001, 0x7effffff, RESULT_LT);
+  status |= test__comparesf2(0x80000001, 0x7f000000, RESULT_LT);
+  status |= test__comparesf2(0x80000001, 0x7f7ffffe, RESULT_LT);
+  status |= test__comparesf2(0x80000001, 0x7f7fffff, RESULT_LT);
+  status |= test__comparesf2(0x80000001, 0x7fac481a, RESULT_UN);
+  status |= test__comparesf2(0x80000001, 0x7fcf111d, RESULT_UN);
+  status |= test__comparesf2(0x80000001, 0x80000001, RESULT_EQ);
+  status |= test__comparesf2(0x80000001, 0xbf7fffff, RESULT_GT);
+  status |= test__comparesf2(0x80000001, 0xbf800000, RESULT_GT);
+  status |= test__comparesf2(0x80000001, 0xbffffffe, RESULT_GT);
+  status |= test__comparesf2(0x80000001, 0xbfffffff, RESULT_GT);
+  status |= test__comparesf2(0x80000001, 0xfeffffff, RESULT_GT);
+  status |= test__comparesf2(0x80000001, 0xff000000, RESULT_GT);
+  status |= test__comparesf2(0x80000001, 0xff7ffffe, RESULT_GT);
+  status |= test__comparesf2(0x80000001, 0xff7fffff, RESULT_GT);
+  status |= test__comparesf2(0x80000002, 0x80000001, RESULT_LT);
+  status |= test__comparesf2(0x80000003, 0x40400000, RESULT_LT);
+  status |= test__comparesf2(0x80000003, 0x7f000000, RESULT_LT);
+  status |= test__comparesf2(0x80000003, 0x80000002, RESULT_LT);
+  status |= test__comparesf2(0x80000003, 0xff000000, RESULT_GT);
+  status |= test__comparesf2(0x80000004, 0x80000004, RESULT_EQ);
+  status |= test__comparesf2(0x807ffffd, 0x807ffffe, RESULT_GT);
+  status |= test__comparesf2(0x807fffff, 0x00000000, RESULT_LT);
+  status |= test__comparesf2(0x807fffff, 0x007fffff, RESULT_LT);
+  status |= test__comparesf2(0x807fffff, 0x7f800000, RESULT_LT);
+  status |= test__comparesf2(0x807fffff, 0x7faf07f6, RESULT_UN);
+  status |= test__comparesf2(0x807fffff, 0x7fd18a54, RESULT_UN);
+  status |= test__comparesf2(0x807fffff, 0x80000000, RESULT_LT);
+  status |= test__comparesf2(0x807fffff, 0x807ffffe, RESULT_LT);
+  status |= test__comparesf2(0x807fffff, 0x807fffff, RESULT_EQ);
+  status |= test__comparesf2(0x807fffff, 0x80800000, RESULT_GT);
+  status |= test__comparesf2(0x807fffff, 0xff800000, RESULT_GT);
+  status |= test__comparesf2(0x80800000, 0x00000000, RESULT_LT);
+  status |= test__comparesf2(0x80800000, 0x00800000, RESULT_LT);
+  status |= test__comparesf2(0x80800001, 0x80800000, RESULT_LT);
+  status |= test__comparesf2(0x80800001, 0x80800002, RESULT_GT);
+  status |= test__comparesf2(0x80ffffff, 0x81000000, RESULT_GT);
+  status |= test__comparesf2(0x80ffffff, 0x81000002, RESULT_GT);
+  status |= test__comparesf2(0x80ffffff, 0x81000004, RESULT_GT);
+  status |= test__comparesf2(0x81000000, 0x80ffffff, RESULT_LT);
+  status |= test__comparesf2(0x81000001, 0x80800001, RESULT_LT);
+  status |= test__comparesf2(0x81000001, 0x80ffffff, RESULT_LT);
+  status |= test__comparesf2(0x81000002, 0x80800001, RESULT_LT);
+  status |= test__comparesf2(0x817fffff, 0x81800000, RESULT_GT);
+  status |= test__comparesf2(0x81800000, 0x817fffff, RESULT_LT);
+  status |= test__comparesf2(0x81800001, 0x817fffff, RESULT_LT);
+  status |= test__comparesf2(0x81800002, 0x81000003, RESULT_LT);
+  status |= test__comparesf2(0xbf800000, 0x3f800003, RESULT_LT);
+  status |= test__comparesf2(0xbf800000, 0x7fa66ee9, RESULT_UN);
+  status |= test__comparesf2(0xbf800000, 0x7fe481ef, RESULT_UN);
+  status |= test__comparesf2(0xbf800000, 0x80000000, RESULT_LT);
+  status |= test__comparesf2(0xbf800000, 0xbf800003, RESULT_GT);
+  status |= test__comparesf2(0xbf800001, 0x3f800000, RESULT_LT);
+  status |= test__comparesf2(0xbf800001, 0xbf800000, RESULT_LT);
+  status |= test__comparesf2(0xbf800001, 0xbf800002, RESULT_GT);
+  status |= test__comparesf2(0xbffffffc, 0xbffffffd, RESULT_GT);
+  status |= test__comparesf2(0xbfffffff, 0x00000001, RESULT_LT);
+  status |= test__comparesf2(0xbfffffff, 0xc0000000, RESULT_GT);
+  status |= test__comparesf2(0xc0000000, 0x40000001, RESULT_LT);
+  status |= test__comparesf2(0xc0000000, 0xbfffffff, RESULT_LT);
+  status |= test__comparesf2(0xc0000000, 0xc0000001, RESULT_GT);
+  status |= test__comparesf2(0xc0000001, 0x40000002, RESULT_LT);
+  status |= test__comparesf2(0xc0000001, 0xbf800001, RESULT_LT);
+  status |= test__comparesf2(0xc0000001, 0xc0000002, RESULT_GT);
+  status |= test__comparesf2(0xc0000002, 0xbf800001, RESULT_LT);
+  status |= test__comparesf2(0xc0000002, 0xbf800003, RESULT_LT);
+  status |= test__comparesf2(0xc0000004, 0xc0000003, RESULT_LT);
+  status |= test__comparesf2(0xc0400000, 0x40400000, RESULT_LT);
+  status |= test__comparesf2(0xc07fffff, 0xc07ffffe, RESULT_LT);
+  status |= test__comparesf2(0xc07fffff, 0xc0800002, RESULT_GT);
+  status |= test__comparesf2(0xc0800001, 0xc07fffff, RESULT_LT);
+  status |= test__comparesf2(0xfd800001, 0xfd7fffff, RESULT_LT);
+  status |= test__comparesf2(0xfe7fffff, 0xfe7ffffe, RESULT_LT);
+  status |= test__comparesf2(0xfe7fffff, 0xfe800002, RESULT_GT);
+  status |= test__comparesf2(0xfe800000, 0xfe7fffff, RESULT_LT);
+  status |= test__comparesf2(0xfe800000, 0xfe800001, RESULT_GT);
+  status |= test__comparesf2(0xfe800001, 0x7e800000, RESULT_LT);
+  status |= test__comparesf2(0xfe800001, 0xfe800000, RESULT_LT);
+  status |= test__comparesf2(0xfe800001, 0xff000001, RESULT_GT);
+  status |= test__comparesf2(0xfe800002, 0xfe000003, RESULT_LT);
+  status |= test__comparesf2(0xfe800004, 0xfe800003, RESULT_LT);
+  status |= test__comparesf2(0xfefffffe, 0x7efffffe, RESULT_LT);
+  status |= test__comparesf2(0xfefffffe, 0x7effffff, RESULT_LT);
+  status |= test__comparesf2(0xfefffffe, 0xfefffffe, RESULT_EQ);
+  status |= test__comparesf2(0xfefffffe, 0xfeffffff, RESULT_GT);
+  status |= test__comparesf2(0xfeffffff, 0x3f800000, RESULT_LT);
+  status |= test__comparesf2(0xfeffffff, 0x7f000000, RESULT_LT);
+  status |= test__comparesf2(0xfeffffff, 0xbf800000, RESULT_LT);
+  status |= test__comparesf2(0xfeffffff, 0xff000000, RESULT_GT);
+  status |= test__comparesf2(0xff000000, 0x00000000, RESULT_LT);
+  status |= test__comparesf2(0xff000000, 0x3f800000, RESULT_LT);
+  status |= test__comparesf2(0xff000000, 0x7f800000, RESULT_LT);
+  status |= test__comparesf2(0xff000000, 0x80000000, RESULT_LT);
+  status |= test__comparesf2(0xff000000, 0xbf800000, RESULT_LT);
+  status |= test__comparesf2(0xff000000, 0xff000000, RESULT_EQ);
+  status |= test__comparesf2(0xff000000, 0xff800000, RESULT_GT);
+  status |= test__comparesf2(0xff000001, 0x7f000000, RESULT_LT);
+  status |= test__comparesf2(0xff000001, 0xff000000, RESULT_LT);
+  status |= test__comparesf2(0xff000001, 0xff000002, RESULT_GT);
+  status |= test__comparesf2(0xff000002, 0xfe800001, RESULT_LT);
+  status |= test__comparesf2(0xff7ffffe, 0x3f800000, RESULT_LT);
+  status |= test__comparesf2(0xff7ffffe, 0x7f7fffff, RESULT_LT);
+  status |= test__comparesf2(0xff7ffffe, 0xbf800000, RESULT_LT);
+  status |= test__comparesf2(0xff7ffffe, 0xff7fffff, RESULT_GT);
+  status |= test__comparesf2(0xff7fffff, 0x00000001, RESULT_LT);
+  status |= test__comparesf2(0xff7fffff, 0x3f800000, RESULT_LT);
+  status |= test__comparesf2(0xff7fffff, 0x7f919cff, RESULT_UN);
+  status |= test__comparesf2(0xff7fffff, 0x7fd729a7, RESULT_UN);
+  status |= test__comparesf2(0xff7fffff, 0x80000001, RESULT_LT);
+  status |= test__comparesf2(0xff7fffff, 0xbf800000, RESULT_LT);
+  status |= test__comparesf2(0xff7fffff, 0xff7fffff, RESULT_EQ);
+  status |= test__comparesf2(0xff800000, 0x00000000, RESULT_LT);
+  status |= test__comparesf2(0xff800000, 0x00000001, RESULT_LT);
+  status |= test__comparesf2(0xff800000, 0x007fffff, RESULT_LT);
+  status |= test__comparesf2(0xff800000, 0x7f000000, RESULT_LT);
+  status |= test__comparesf2(0xff800000, 0x7f7fffff, RESULT_LT);
+  status |= test__comparesf2(0xff800000, 0x7f800000, RESULT_LT);
+  status |= test__comparesf2(0xff800000, 0x7fafdbc1, RESULT_UN);
+  status |= test__comparesf2(0xff800000, 0x7fec80fe, RESULT_UN);
+  status |= test__comparesf2(0xff800000, 0x80000000, RESULT_LT);
+  status |= test__comparesf2(0xff800000, 0x80000001, RESULT_LT);
+  status |= test__comparesf2(0xff800000, 0x807fffff, RESULT_LT);
+  status |= test__comparesf2(0xff800000, 0xff000000, RESULT_LT);
+  status |= test__comparesf2(0xff800000, 0xff7fffff, RESULT_LT);
+  status |= test__comparesf2(0xff800000, 0xff800000, RESULT_EQ);
+
+  return status;
+}

_______________________________________________
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [compiler-rt] [compiler-rt][ARM] Optimized single-precision FP comparisons (PR #179925)

Reply via email to