https://github.com/statham-arm created https://github.com/llvm/llvm-project/pull/179925
These comparison functions follow the same structure as the double-precision ones in a prior commit, of a header file containing the main logic and some entry points varying the construction of the return value. In this case, we have provided versions for Thumb1 as well as Arm/Thumb2. >From 017a0e3bac60a714ef6923eead584a91eebbf2a1 Mon Sep 17 00:00:00 2001 From: Simon Tatham <[email protected]> Date: Thu, 29 Jan 2026 16:10:11 +0000 Subject: [PATCH] [compiler-rt][ARM] Optimized single-precision FP comparisons These comparison functions follow the same structure as the double-precision ones in a prior commit, of a header file containing the main logic and some entry points varying the construction of the return value. In this case, we have provided versions for Thumb1 as well as Arm/Thumb2. --- compiler-rt/lib/builtins/CMakeLists.txt | 9 + compiler-rt/lib/builtins/arm/cmpsf2.S | 56 +++ compiler-rt/lib/builtins/arm/fcmp.h | 174 +++++++ compiler-rt/lib/builtins/arm/gesf2.S | 54 +++ compiler-rt/lib/builtins/arm/thumb1/cmpsf2.S | 55 +++ compiler-rt/lib/builtins/arm/thumb1/fcmp.h | 191 ++++++++ compiler-rt/lib/builtins/arm/thumb1/gesf2.S | 54 +++ .../lib/builtins/arm/thumb1/unordsf2.S | 49 ++ compiler-rt/lib/builtins/arm/unordsf2.S | 56 +++ .../test/builtins/Unit/comparesf2new_test.c | 433 ++++++++++++++++++ 10 files changed, 1131 insertions(+) create mode 100644 compiler-rt/lib/builtins/arm/cmpsf2.S create mode 100644 compiler-rt/lib/builtins/arm/fcmp.h create mode 100644 compiler-rt/lib/builtins/arm/gesf2.S create mode 100644 compiler-rt/lib/builtins/arm/thumb1/cmpsf2.S create mode 100644 compiler-rt/lib/builtins/arm/thumb1/fcmp.h create mode 100644 compiler-rt/lib/builtins/arm/thumb1/gesf2.S create mode 100644 compiler-rt/lib/builtins/arm/thumb1/unordsf2.S create mode 100644 compiler-rt/lib/builtins/arm/unordsf2.S create mode 100644 compiler-rt/test/builtins/Unit/comparesf2new_test.c diff --git a/compiler-rt/lib/builtins/CMakeLists.txt b/compiler-rt/lib/builtins/CMakeLists.txt index 0e8b0fa553442..0c53781a51392 100644 --- a/compiler-rt/lib/builtins/CMakeLists.txt +++ b/compiler-rt/lib/builtins/CMakeLists.txt @@ -451,8 +451,11 @@ if(COMPILER_RT_ARM_OPTIMIZED_FP AND BUILTIN_SUPPORTED_ARCH MATCHES "arm") arm/muldf3.S arm/divdf3.S arm/cmpdf2.S + arm/cmpsf2.S arm/gedf2.S + arm/gesf2.S arm/unorddf2.S + arm/unordsf2.S ) set_source_files_properties(${assembly_files} PROPERTIES COMPILE_OPTIONS ${implicit_it_flag}) @@ -507,8 +510,11 @@ if(COMPILER_RT_ARM_OPTIMIZED_FP) set(thumb1_base_SOURCES arm/thumb1/mulsf3.S arm/thumb1/cmpdf2.S + arm/thumb1/cmpsf2.S arm/thumb1/gedf2.S + arm/thumb1/gesf2.S arm/thumb1/unorddf2.S + arm/thumb1/unordsf2.S arm/fnan2.c arm/fnorm2.c arm/funder.c @@ -516,6 +522,9 @@ if(COMPILER_RT_ARM_OPTIMIZED_FP) ) set_property(SOURCE arm/thumb1/cmpdf2.S PROPERTY crt_supersedes comparedf2.c) set_property(SOURCE arm/thumb1/cmpdf2.S DIRECTORY ${COMPILER_RT_SOURCE_DIR} PROPERTY crt_provides comparedf2) + set_property(SOURCE arm/thumb1/cmpsf2.S PROPERTY crt_supersedes comparesf2.S) + # We don't need to set 'crt_provides' for cmpsf2.S, because the + # superseded comparesf2.S will already have enabled the comparesf2 tests. endif() set(arm_EABI_RT_SOURCES diff --git a/compiler-rt/lib/builtins/arm/cmpsf2.S b/compiler-rt/lib/builtins/arm/cmpsf2.S new file mode 100644 index 0000000000000..14166246101af --- /dev/null +++ b/compiler-rt/lib/builtins/arm/cmpsf2.S @@ -0,0 +1,56 @@ +//===-- cmpsf2.S - single-precision floating point comparison -------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This function has the semantics of GNU __cmpsf2: it's a three-way compare +// which returns <0 if x<y, 0 if x==y, and >0 if x>y. If the result is +// unordered (i.e. x or y or both is NaN) then it returns >0. +// +// This also makes it suitable for use as all of __eqsf2, __nesf2, __ltsf2 or +// __lesf2. +// +//===----------------------------------------------------------------------===// + +#include "../assembly.h" + + .syntax unified + .text + .p2align 2 + +op0 .req r0 +op1 .req r1 +.macro SetReturnRegister + mov r0, #0 + movhi r0, #1 + movlo r0, #-1 +.endm + +#if __ARM_PCS_VFP +DEFINE_COMPILERRT_FUNCTION(__cmpsf2) + push {r4, lr} + vmov r0, s0 + vmov r1, s1 + bl __compiler_rt_softfp_cmpsf2 + pop {r4, pc} +#else +DEFINE_COMPILERRT_FUNCTION_ALIAS(__cmpsf2, __compiler_rt_softfp_cmpsf2) +#endif +DEFINE_COMPILERRT_FUNCTION_ALIAS(__lesf2, __cmpsf2) +DEFINE_COMPILERRT_FUNCTION_ALIAS(__ltsf2, __cmpsf2) +DEFINE_COMPILERRT_FUNCTION_ALIAS(__eqsf2, __cmpsf2) +DEFINE_COMPILERRT_FUNCTION_ALIAS(__nesf2, __cmpsf2) + +DEFINE_COMPILERRT_FUNCTION(__compiler_rt_softfp_cmpsf2) + #include "fcmp.h" + +LOCAL_LABEL(NaN): + mov r0, #+1 + bx lr + +END_COMPILERRT_FUNCTION(__compiler_rt_softfp_cmpsf2) + +NO_EXEC_STACK_DIRECTIVE diff --git a/compiler-rt/lib/builtins/arm/fcmp.h b/compiler-rt/lib/builtins/arm/fcmp.h new file mode 100644 index 0000000000000..23bdd73a10c5b --- /dev/null +++ b/compiler-rt/lib/builtins/arm/fcmp.h @@ -0,0 +1,174 @@ +//===-- fcmp.h - shared code for single-precision FP comparison functions -===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This code is the skeleton of a double-precision FP compare, with two details +// left out: which input value is in which register, and how to make the return +// value. It allows the main comparison logic to be shared between (for +// example) __lesf2 and __gesf2, varying only those details. +// +//===----------------------------------------------------------------------===// + +// How to use this header file: +// +// This header file is expected to be #included from inside a function +// definition in a .S file. The source file including this header should +// provide the following: +// +// op0 and op1: register aliases (via .req) for the registers containing the +// input operands. +// - For most comparisons, op0 will correspond to r0 and op1 to r1. +// - But a function with the reversed semantics of __aeabi_cfrcmple wil define +// them the other way round. +// +// SetReturnRegister: an assembly macro that looks at the PSR flags and sets up +// an appropriate return value in r0, for the cases that do *not* involve NaN. +// - On entry to this macro, the condition codes LO, EQ and HI indicate that +// op0 < op1, op0 == op1 or op0 > op1 respectively. +// - For functions that return a result in the flags, this macro can be empty, +// because those are the correct flags to return anyway. +// - Functions that return a boolean in r0 should set it up by checking the +// flags. +// +// LOCAL_LABEL(NaN): a label defined within the compare function, after the +// #include of this header. Called when at least one input is a NaN, and sets +// up the appropriate return value for that case. + +// -------------------------------------------------- +// The actual entry point of the compare function. +// +// The basic plan is to start by ORing together the two inputs. This tells us +// two things: +// - the top bit of the output tells us whether both inputs are positive, or +// whether at least one is negative +// - if the 8 exponent bits of the output are not all 1, then there are +// definitely no NaNs, so a fast path can handle most non-NaN cases. + + // First diverge control for the negative-numbers case. + orrs r12, op0, op1 + bmi LOCAL_LABEL(negative) // high bit set => at least one negative input + + // Here, both inputs are positive. Try adding 1<<23 to their bitwise OR in + // r12. This will carry all the way into the top bit, setting the N flag, if + // all 8 exponent bits were set. + cmn r12, #1 << 23 + bmi LOCAL_LABEL(NaNInf_check_positive) // need to look harder for NaNs + + // The fastest fast path: both inputs positive and we could easily tell there + // were no NaNs. So we just compare op0 and op1 as unsigned integers. + cmp op0, op1 + SetReturnRegister + bx lr + +LOCAL_LABEL(NaNInf_check_positive): + // Second tier for positive numbers. We come here if both inputs are + // positive, but our fast initial check didn't manage to rule out a NaN. But + // it's not guaranteed that there _is_ a NaN, for two reasons: + // + // 1. An input with exponent 0xFF might be an infinity instead. Those behave + // normally under comparison. + // + // 2. There might not even _be_ an input with exponent 0xFF. All we know so + // far is that the two inputs ORed together had all the exponent bits + // set. So each of those bits is set in _at least one_ of the inputs, but + // not necessarily all in the _same_ input. + // + // Test each exponent individually for 0xFF, using the same CMN idiom as + // above. If neither one carries into the sign bit then we have no NaNs _or_ + // infinities and can compare the registers and return again. + cmn op0, #1 << 23 + cmnpl op1, #1 << 23 + bmi LOCAL_LABEL(NaN_check_positive) + + // Second-tier return path, now we've ruled out anything difficult. + cmp op0, op1 + SetReturnRegister + bx lr + +LOCAL_LABEL(NaN_check_positive): + // Third tier for positive numbers. Here we know that at least one of the + // inputs has exponent 0xFF. But they might still be infinities rather than + // NaNs. So now we must check whether there's an actual NaN, by shifting each + // input left to get rid of the sign bit, and seeing if the result is + // _greater_ than 0xFF000000 (but not equal). + // + // We could have skipped the second-tier check and done this more rigorous + // test immediately. But that would cost an extra instruction in the case + // where there are no infinities or NaNs, and we assume that that is so much + // more common that it's worth optimizing for. + mov r12, #0xFF << 24 + cmp r12, op0, LSL #1 // if LO, then r12 < (op0 << 1), so op0 is a NaN + cmphs r12, op1, LSL #1 // if not LO, then do the same check for op1 + blo LOCAL_LABEL(NaN) // now, if LO, there's definitely a NaN + + // Now we've finally ruled out NaNs! And we still know both inputs are + // positive. So the third-tier return path can just compare the numbers + // again. + cmp op0, op1 + SetReturnRegister + bx lr + +LOCAL_LABEL(negative): + // We come here if at least one operand is negative. We haven't checked for + // NaNs at all yet (the sign check came first), so repeat the first-tier + // check strategy of seeing if all exponent bits are set in r12. + // + // On this path, the sign bit in r12 is set, so if adding 1 to the low + // exponent bit carries all the way through into the sign bit, it will + // _clear_ the sign bit rather than setting it. So we expect MI to be the + // "definitely no NaNs" result, where it was PL on the positive branch. + cmn r12, #1 << 23 + bpl LOCAL_LABEL(NaNInf_check_negative) + + // Now we have no NaNs, but at least one negative number. This gives us two + // complications: + // + // 1. Floating-point numbers are sign/magnitude, not two's complement, so we + // have to consider separately the cases of "both negative" and "one of + // each sign". + // + // 2. -0 and +0 are required to compare equal. + // + // But problem #1 is not as hard as it sounds! If both operands are negative, + // then we can get the result we want by comparing them as unsigned integers + // the opposite way round, because the input with the smaller value (as an + // integer) is the larger number in an FP ordering sense. And if one operand + // is negative and the other is positive, the _same_ reversed comparison + // works, because the positive number (with zero sign bit) will always + // compare less than the negative one in an unsigned-integers sense. + // + // So we only have to worry about problem #2, signed zeroes. This only + // affects the answer if _both_ operands are zero. And we can check that + // easily, because it happens if and only if r12 = 0x80000000. (We know r12 + // has its sign bit set; if it has no other bits set, that's because both + // inputs were either 0x80000000 or 0x00000000.) + cmp r12, #0x80000000 // EQ if both inputs are zero + cmpne op1, op0 // otherwise, compare them backwards + SetReturnRegister + bx lr + +LOCAL_LABEL(NaNInf_check_negative): + // Second tier for negative numbers: we know the OR of the exponents is 0xFF, + // but again, we might not have either _actual_ exponent 0xFF, and also, an + // exponent 0xFF might be an infinity instead of a NaN. + // + // On this path we've already branched twice (once for negative numbers and + // once for the first-tier NaN check), so we'll just go straight to the + // precise check for NaNs. + mov r12, #0xFF << 24 + cmp r12, op0, LSL #1 // if LO, then r12 < (op0 << 1), so op0 is a NaN + cmphs r12, op1, LSL #1 // if not LO, then do the same check for op1 + blo LOCAL_LABEL(NaN) + + // Now we've ruled out NaNs, so we can just compare the two input registers + // and return. On this path we _don't_ need to check for the special case of + // comparing two zeroes, because we only came here if the bitwise OR of the + // exponent fields was 0xFF, which means the exponents can't both have been + // zero! So we can _just_ do the reversed CMP and finish. + cmp op1, op0 + SetReturnRegister + bx lr diff --git a/compiler-rt/lib/builtins/arm/gesf2.S b/compiler-rt/lib/builtins/arm/gesf2.S new file mode 100644 index 0000000000000..c149eea589f05 --- /dev/null +++ b/compiler-rt/lib/builtins/arm/gesf2.S @@ -0,0 +1,54 @@ +//===-- gesf2.S - single-precision floating point comparison --------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This function has the semantics of GNU __cmpsf2, except for its NaN +// handling. It's a three-way compare which returns <0 if x<y, 0 if x==y, and +// >0 if x>y. If the result is unordered (i.e. x or y or both is NaN) then it +// returns <0, where __cmpsf2 would return >0. +// +// This also makes it suitable for use as __gtsf2 or __gesf2 (or __eqsf2 or +// __nesf2). +// +//===----------------------------------------------------------------------===// + +#include "../assembly.h" + + .syntax unified + .text + .p2align 2 + +op0 .req r0 +op1 .req r1 +.macro SetReturnRegister + mov r0, #0 + movhi r0, #1 + movlo r0, #-1 +.endm + +#if __ARM_PCS_VFP +DEFINE_COMPILERRT_FUNCTION(__gesf2) + push {r4, lr} + vmov r0, s0 + vmov r1, s1 + bl __compiler_rt_softfp_gesf2 + pop {r4, pc} +#else +DEFINE_COMPILERRT_FUNCTION_ALIAS(__gesf2, __compiler_rt_softfp_gesf2) +#endif +DEFINE_COMPILERRT_FUNCTION_ALIAS(__gtsf2, __gesf2) + +DEFINE_COMPILERRT_FUNCTION(__compiler_rt_softfp_gesf2) + #include "fcmp.h" + +LOCAL_LABEL(NaN): + mov r0, #-1 + bx lr + +END_COMPILERRT_FUNCTION(__compiler_rt_softfp_gesf2) + +NO_EXEC_STACK_DIRECTIVE diff --git a/compiler-rt/lib/builtins/arm/thumb1/cmpsf2.S b/compiler-rt/lib/builtins/arm/thumb1/cmpsf2.S new file mode 100644 index 0000000000000..c8611d1147366 --- /dev/null +++ b/compiler-rt/lib/builtins/arm/thumb1/cmpsf2.S @@ -0,0 +1,55 @@ +//===-- cmpsf2.S - single-precision floating point comparison -------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This function has the semantics of GNU __cmpsf2: it's a three-way compare +// which returns <0 if x<y, 0 if x==y, and >0 if x>y. If the result is +// unordered (i.e. x or y or both is NaN) then it returns >0. +// +// This also makes it suitable for use as all of __eqsf2, __nesf2, __ltsf2 or +// __lesf2. +// +//===----------------------------------------------------------------------===// + +#include "../../assembly.h" + + .syntax unified + .text + .p2align 2 + +op0 .req r0 +op1 .req r1 +.macro SetReturnRegister + bhi 0f + blo 1f + movs r0, #0 + bx lr +0: + movs r0, #1 + bx lr +1: + movs r0, #1 + rsbs r0, r0, #0 + bx lr +.endm + +DEFINE_COMPILERRT_FUNCTION_ALIAS(__cmpsf2, __compiler_rt_softfp_cmpsf2) +DEFINE_COMPILERRT_FUNCTION_ALIAS(__lesf2, __cmpsf2) +DEFINE_COMPILERRT_FUNCTION_ALIAS(__ltsf2, __cmpsf2) +DEFINE_COMPILERRT_FUNCTION_ALIAS(__eqsf2, __cmpsf2) +DEFINE_COMPILERRT_FUNCTION_ALIAS(__nesf2, __cmpsf2) + +DEFINE_COMPILERRT_THUMB_FUNCTION(__compiler_rt_softfp_cmpsf2) + #include "fcmp.h" + +LOCAL_LABEL(NaN): + movs r0, #1 + bx lr + +END_COMPILERRT_FUNCTION(__compiler_rt_softfp_cmpsf2) + +NO_EXEC_STACK_DIRECTIVE diff --git a/compiler-rt/lib/builtins/arm/thumb1/fcmp.h b/compiler-rt/lib/builtins/arm/thumb1/fcmp.h new file mode 100644 index 0000000000000..bcfe928407e3c --- /dev/null +++ b/compiler-rt/lib/builtins/arm/thumb1/fcmp.h @@ -0,0 +1,191 @@ +//===-- fcmp.h - shared code for single-precision FP comparison functions -===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This code is the skeleton of a double-precision FP compare, with two details +// left out: which input value is in which register, and how to make the return +// value. It allows the main comparison logic to be shared between (for +// example) __lesf2 and __gesf2, varying only those details. +// +//===----------------------------------------------------------------------===// + +// How to use this header file: +// +// This header file is expected to be #included from inside a function +// definition in a .S file. The source file including this header should +// provide the following: +// +// op0 and op1: register aliases (via .req) for the registers containing the +// input operands. +// - For most comparisons, op0 will correspond to r0 and op1 to r1. +// - But a function with the reversed semantics of __aeabi_cfrcmple wil define +// them the other way round. +// +// SetReturnRegister: an assembly macro that looks at the PSR flags and sets up +// an appropriate return value in r0, for the cases that do *not* involve NaN. +// - On entry to this macro, the condition codes LO, EQ and HI indicate that +// op0 < op1, op0 == op1 or op0 > op1 respectively. +// - For functions that return a result in the flags, this macro can be empty, +// because those are the correct flags to return anyway. +// - Functions that return a boolean in r0 should set it up by checking the +// flags. +// +// LOCAL_LABEL(NaN): a label defined within the compare function, after the +// #include of this header. Called when at least one input is a NaN, and sets +// up the appropriate return value for that case. + +// -------------------------------------------------- +// The actual entry point of the compare function. +// +// The basic plan is to start by ORing together the two inputs. This tells us +// two things: +// - the top bit of the output tells us whether both inputs are positive, or +// whether at least one is negative +// - if the 8 exponent bits of the output are not all 1, then there are +// definitely no NaNs, so a fast path can handle most non-NaN cases. + + // Set up the constant 1 << 23 in a register, which we'll need on all + // branches. + movs r3, #1 + lsls r3, r3, #23 + + // Diverge control for the negative-numbers case. + movs r2, op0 + orrs r2, r2, op1 + bmi LOCAL_LABEL(negative) // high bit set => at least one negative input + + // Here, both inputs are positive. Try adding 1<<23 to their bitwise OR in + // r2. This will carry all the way into the top bit, setting the N flag, if + // all 8 exponent bits were set. + cmn r2, r3 + bmi LOCAL_LABEL(NaNInf_check_positive) // need to look harder for NaNs + + // The fastest fast path: both inputs positive and we could easily tell there + // were no NaNs. So we just compare op0 and op1 as unsigned integers. + cmp op0, op1 + SetReturnRegister + bx lr + +LOCAL_LABEL(NaNInf_check_positive): + // Second tier for positive numbers. We come here if both inputs are + // positive, but our fast initial check didn't manage to rule out a NaN. But + // it's not guaranteed that there _is_ a NaN, for two reasons: + // + // 1. An input with exponent 0xFF might be an infinity instead. Those behave + // normally under comparison. + // + // 2. There might not even _be_ an input with exponent 0xFF. All we know so + // far is that the two inputs ORed together had all the exponent bits + // set. So each of those bits is set in _at least one_ of the inputs, but + // not necessarily all in the _same_ input. + // + // Test each exponent individually for 0xFF, using the same CMN idiom as + // above. If neither one carries into the sign bit then we have no NaNs _or_ + // infinities and can compare the registers and return again. + cmn op0, r3 + bmi LOCAL_LABEL(NaN_check_positive) + cmn op1, r3 + bmi LOCAL_LABEL(NaN_check_positive) + + // Second-tier return path, now we've ruled out anything difficult. + cmp op0, op1 + SetReturnRegister + bx lr + +LOCAL_LABEL(NaN_check_positive): + // Third tier for positive numbers. Here we know that at least one of the + // inputs has exponent 0xFF. But they might still be infinities rather than + // NaNs. So now we must check whether there's an actual NaN, by shifting each + // input left to get rid of the sign bit, and seeing if the result is + // _greater_ than 0xFF000000 (but not equal). + // + // We could have skipped the second-tier check and done this more rigorous + // test immediately. But that would cost an extra instruction in the case + // where there are no infinities or NaNs, and we assume that that is so much + // more common that it's worth optimizing for. + movs r2, #0xFF + lsls r2, r2, #24 + lsls r3, op0, #1 + cmp r3, r2 + bhi LOCAL_LABEL(NaN) + lsls r3, op1, #1 + cmp r3, r2 + bhi LOCAL_LABEL(NaN) + + // Now we've finally ruled out NaNs! And we still know both inputs are + // positive. So the third-tier return path can just compare the numbers + // again. + cmp op0, op1 + SetReturnRegister + bx lr + +LOCAL_LABEL(negative): + // We come here if at least one operand is negative. We haven't checked for + // NaNs at all yet (the sign check came first), so repeat the first-tier + // check strategy of seeing if all exponent bits are set in r12. + // + // On this path, the sign bit in r12 is set, so if adding 1 to the low + // exponent bit carries all the way through into the sign bit, it will + // _clear_ the sign bit rather than setting it. So we expect MI to be the + // "definitely no NaNs" result, where it was PL on the positive branch. + cmn r2, r3 + bpl LOCAL_LABEL(NaNInf_check_negative) + + // Now we have no NaNs, but at least one negative number. This gives us two + // complications: + // + // 1. Floating-point numbers are sign/magnitude, not two's complement, so we + // have to consider separately the cases of "both negative" and "one of + // each sign". + // + // 2. -0 and +0 are required to compare equal. + // + // But problem #1 is not as hard as it sounds! If both operands are negative, + // then we can get the result we want by comparing them as unsigned integers + // the opposite way round, because the input with the smaller value (as an + // integer) is the larger number in an FP ordering sense. And if one operand + // is negative and the other is positive, the _same_ reversed comparison + // works, because the positive number (with zero sign bit) will always + // compare less than the negative one in an unsigned-integers sense. + // + // So we only have to worry about problem #2, signed zeroes. This only + // affects the answer if _both_ operands are zero. And we can check that + // easily, because it happens if and only if r12 = 0x80000000. (We know r12 + // has its sign bit set; if it has no other bits set, that's because both + // inputs were either 0x80000000 or 0x00000000.) + lsls r2, r2, #1 // EQ if both inputs are zero (also sets C) + beq 1f + cmp op1, op0 // otherwise, compare them backwards +1: + SetReturnRegister + bx lr + +LOCAL_LABEL(NaNInf_check_negative): + // Second tier for negative numbers: we know the OR of the exponents is 0xFF, + // but again, we might not have either _actual_ exponent 0xFF, and also, an + // exponent 0xFF might be an infinity instead of a NaN. + // + // On this path we've already branched twice (once for negative numbers and + // once for the first-tier NaN check), so we'll just go straight to the + // precise check for NaNs. + movs r2, #0xFF + lsls r2, r2, #24 + lsls r3, op0, #1 + cmp r3, r2 + bhi LOCAL_LABEL(NaN) + lsls r3, op1, #1 + cmp r3, r2 + bhi LOCAL_LABEL(NaN) + + // Now we've ruled out NaNs, so we can just compare the two input registers + // and return. On this path we _don't_ need to check for the special case of + // comparing two zeroes, because we only came here if the bitwise OR of the + // exponent fields was 0xFF, which means the exponents can't both have been + // zero! So we can _just_ do the reversed CMP and finish. + cmp op1, op0 + SetReturnRegister + bx lr diff --git a/compiler-rt/lib/builtins/arm/thumb1/gesf2.S b/compiler-rt/lib/builtins/arm/thumb1/gesf2.S new file mode 100644 index 0000000000000..aa75ec7b0a67b --- /dev/null +++ b/compiler-rt/lib/builtins/arm/thumb1/gesf2.S @@ -0,0 +1,54 @@ +//===-- gesf2.S - single-precision floating point comparison --------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This function has the semantics of GNU __cmpsf2, except for its NaN +// handling. It's a three-way compare which returns <0 if x<y, 0 if x==y, and +// >0 if x>y. If the result is unordered (i.e. x or y or both is NaN) then it +// returns <0, where __cmpsf2 would return >0. +// +// This also makes it suitable for use as __gtsf2 or __gesf2 (or __eqsf2 or +// __nesf2). +// +//===----------------------------------------------------------------------===// + +#include "../../assembly.h" + + .syntax unified + .text + .p2align 2 + +op0 .req r0 +op1 .req r1 +.macro SetReturnRegister + bhi 0f + blo 1f + movs r0, #0 + bx lr +0: + movs r0, #1 + bx lr +1: + movs r0, #1 + rsbs r0, r0, #0 + bx lr +.endm + +DEFINE_COMPILERRT_FUNCTION_ALIAS(__gesf2, __compiler_rt_softfp_gesf2) +DEFINE_COMPILERRT_FUNCTION_ALIAS(__gtsf2, __gesf2) + +DEFINE_COMPILERRT_THUMB_FUNCTION(__compiler_rt_softfp_gesf2) + #include "fcmp.h" + +LOCAL_LABEL(NaN): + movs r0, #1 + rsbs r0, r0, #0 + bx lr + +END_COMPILERRT_FUNCTION(__compiler_rt_softfp_gesf2) + +NO_EXEC_STACK_DIRECTIVE diff --git a/compiler-rt/lib/builtins/arm/thumb1/unordsf2.S b/compiler-rt/lib/builtins/arm/thumb1/unordsf2.S new file mode 100644 index 0000000000000..5d74e0fdfe159 --- /dev/null +++ b/compiler-rt/lib/builtins/arm/thumb1/unordsf2.S @@ -0,0 +1,49 @@ +//===-- unordsf2.S - single-precision floating point comparison -----------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Return 1 if the result of comparing x with y is 'unordered', i.e. +// one of x and y is NaN. +// +//===----------------------------------------------------------------------===// + +#include "../../assembly.h" + + .syntax unified + .text + .p2align 2 + +DEFINE_COMPILERRT_FUNCTION_ALIAS(__unordsf2, __aeabi_fcmpun) + +DEFINE_COMPILERRT_THUMB_FUNCTION(__aeabi_fcmpun) + + // This function isn't based on the general-purpose code in fcmp.h, because + // it's more effort than needed. Here we just need to identify whether or not + // there's at least one NaN in the inputs. There's no need to vary that check + // based on the sign bit, so we might as well just do the NaN test as quickly + // as possible. + movs r2, #0xFF + lsls r2, r2, #24 + lsls r3, r0, #1 + cmp r3, r2 + bhi LOCAL_LABEL(NaN) + lsls r3, r1, #1 + cmp r3, r2 + bhi LOCAL_LABEL(NaN) + + // If HS, then we have no NaNs and return false. + movs r0, #0 + bx lr + + // Otherwise, we have at least one NaN, and return true. +LOCAL_LABEL(NaN): + movs r0, #1 + bx lr + +END_COMPILERRT_FUNCTION(__aeabi_fcmpun) + +NO_EXEC_STACK_DIRECTIVE diff --git a/compiler-rt/lib/builtins/arm/unordsf2.S b/compiler-rt/lib/builtins/arm/unordsf2.S new file mode 100644 index 0000000000000..1930996779888 --- /dev/null +++ b/compiler-rt/lib/builtins/arm/unordsf2.S @@ -0,0 +1,56 @@ +//===-- unordsf2.S - single-precision floating point comparison -----------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Return 1 if the result of comparing x with y is 'unordered', i.e. +// one of x and y is NaN. +// +//===----------------------------------------------------------------------===// + +#include "../assembly.h" + + + .syntax unified + .text + .p2align 2 + +#if __ARM_PCS_VFP +DEFINE_COMPILERRT_FUNCTION(__unordsf2) + push {r4, lr} + vmov r0, s0 + vmov r1, s1 + bl __aeabi_fcmpun + pop {r4, pc} +#else +DEFINE_COMPILERRT_FUNCTION_ALIAS(__unordsf2, __aeabi_fcmpun) +#endif + +DEFINE_COMPILERRT_FUNCTION(__aeabi_fcmpun) + + // This function isn't based on the general-purpose code in fcmp.h, because + // it's more effort than needed. Here we just need to identify whether or not + // there's at least one NaN in the inputs. There's no need to vary that check + // based on the sign bit, so we might as well just do the NaN test as quickly + // as possible. + mov r12, #0xFF << 24 + cmp r12, r0, lsl #1 // if LO, then r12 < (r0 << 1), so r0 is a NaN + cmphs r12, r1, lsl #1 // if not LO, then do the same check for r1 + + // If HS, then we have no NaNs and return false. We do this as quickly as we + // can (not stopping to take two instructions setting up r0 for both + // possibilities), on the assumption that NaNs are rare and we want to + // optimize for the non-NaN path. + movhs r0, #0 + bxhs lr + + // Otherwise, we have at least one NaN, and return true. + mov r0, #1 + bx lr + +END_COMPILERRT_FUNCTION(__aeabi_fcmpun) + +NO_EXEC_STACK_DIRECTIVE diff --git a/compiler-rt/test/builtins/Unit/comparesf2new_test.c b/compiler-rt/test/builtins/Unit/comparesf2new_test.c new file mode 100644 index 0000000000000..5c8be88354618 --- /dev/null +++ b/compiler-rt/test/builtins/Unit/comparesf2new_test.c @@ -0,0 +1,433 @@ +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +// RUN: %clang_builtins %s %librt -o %t && %run %t +// REQUIRES: librt_has_comparesf2 + +#include "int_lib.h" +#include <inttypes.h> +#include <stdio.h> + +#include "fp_test.h" + +COMPILER_RT_ABI int __eqsf2(float, float); +COMPILER_RT_ABI int __nesf2(float, float); +COMPILER_RT_ABI int __gesf2(float, float); +COMPILER_RT_ABI int __gtsf2(float, float); +COMPILER_RT_ABI int __lesf2(float, float); +COMPILER_RT_ABI int __ltsf2(float, float); +COMPILER_RT_ABI int __cmpsf2(float, float); +COMPILER_RT_ABI int __unordsf2(float, float); + +enum Result { + RESULT_LT, + RESULT_GT, + RESULT_EQ, + RESULT_UN +}; + +int expect(int line, uint32_t a_rep, uint32_t b_rep, const char *name, int result, int ok, const char *expected) { + if (!ok) + printf("error at line %d: %s(%08" PRIx32 ", %08" PRIx32 ") = %d, expected %s\n", + line, name, a_rep, b_rep, result, expected); + return !ok; +} + +int test__comparesf2(int line, uint32_t a_rep, uint32_t b_rep, enum Result result) { + float a = fromRep32(a_rep), b = fromRep32(b_rep); + + int eq = __eqsf2(a, b); + int ne = __nesf2(a, b); + int ge = __gesf2(a, b); + int gt = __gtsf2(a, b); + int le = __lesf2(a, b); + int lt = __ltsf2(a, b); + int cmp = __cmpsf2(a, b); + int unord = __unordsf2(a, b); + + int ret = 0; + + switch (result) { + case RESULT_LT: + ret |= expect(line, a_rep, b_rep, "__eqsf2", eq, eq != 0, "!= 0"); + ret |= expect(line, a_rep, b_rep, "__nesf2", ne, ne != 0, "!= 0"); + ret |= expect(line, a_rep, b_rep, "__gesf2", ge, ge < 0, "< 0"); + ret |= expect(line, a_rep, b_rep, "__gtsf2", gt, gt <= 0, "<= 0"); + ret |= expect(line, a_rep, b_rep, "__lesf2", le, le <= 0, "<= 0"); + ret |= expect(line, a_rep, b_rep, "__ltsf2", lt, lt < 0, "< 0"); + ret |= expect(line, a_rep, b_rep, "__cmpsf2", cmp, cmp == -1, "== -1"); + ret |= expect(line, a_rep, b_rep, "__unordsf2", unord, unord == 0, "== 0"); + break; + case RESULT_GT: + ret |= expect(line, a_rep, b_rep, "__eqsf2", eq, eq != 0, "!= 0"); + ret |= expect(line, a_rep, b_rep, "__nesf2", ne, ne != 0, "!= 0"); + ret |= expect(line, a_rep, b_rep, "__gesf2", ge, ge >= 0, ">= 0"); + ret |= expect(line, a_rep, b_rep, "__gtsf2", gt, gt > 0, "> 0"); + ret |= expect(line, a_rep, b_rep, "__lesf2", le, le > 0, "> 0"); + ret |= expect(line, a_rep, b_rep, "__ltsf2", lt, lt >= 0, ">= 0"); + ret |= expect(line, a_rep, b_rep, "__cmpsf2", cmp, cmp == 1, "== 1"); + ret |= expect(line, a_rep, b_rep, "__unordsf2", unord, unord == 0, "== 0"); + break; + case RESULT_EQ: + ret |= expect(line, a_rep, b_rep, "__eqsf2", eq, eq == 0, "== 0"); + ret |= expect(line, a_rep, b_rep, "__nesf2", ne, ne == 0, "== 0"); + ret |= expect(line, a_rep, b_rep, "__gesf2", ge, ge >= 0, ">= 0"); + ret |= expect(line, a_rep, b_rep, "__gtsf2", gt, gt <= 0, "<= 0"); + ret |= expect(line, a_rep, b_rep, "__lesf2", le, le <= 0, "<= 0"); + ret |= expect(line, a_rep, b_rep, "__ltsf2", lt, lt >= 0, ">= 0"); + ret |= expect(line, a_rep, b_rep, "__cmpsf2", cmp, cmp == 0, "== 0"); + ret |= expect(line, a_rep, b_rep, "__unordsf2", unord, unord == 0, "== 0"); + break; + case RESULT_UN: + ret |= expect(line, a_rep, b_rep, "__eqsf2", eq, eq != 0, "!= 0"); + ret |= expect(line, a_rep, b_rep, "__nesf2", ne, ne != 0, "!= 0"); + ret |= expect(line, a_rep, b_rep, "__gesf2", ge, ge < 0, "< 0"); + ret |= expect(line, a_rep, b_rep, "__gtsf2", gt, gt <= 0, "<= 0"); + ret |= expect(line, a_rep, b_rep, "__lesf2", le, le > 0, "> 0"); + ret |= expect(line, a_rep, b_rep, "__ltsf2", lt, lt >= 0, ">= 0"); + ret |= expect(line, a_rep, b_rep, "__cmpsf2", cmp, cmp == 1, "== 1"); + ret |= expect(line, a_rep, b_rep, "__unordsf2", unord, unord == 1, "== 1"); + break; + } + + return ret; +} + +#define test__comparesf2(a,b,x) test__comparesf2(__LINE__,a,b,x) + +int main(void) { + int status = 0; + + status |= test__comparesf2(0x00000000, 0x00000001, RESULT_LT); + status |= test__comparesf2(0x00000000, 0x007fffff, RESULT_LT); + status |= test__comparesf2(0x00000000, 0x3f800000, RESULT_LT); + status |= test__comparesf2(0x00000000, 0x7f000000, RESULT_LT); + status |= test__comparesf2(0x00000000, 0x7f800000, RESULT_LT); + status |= test__comparesf2(0x00000000, 0x7f872da0, RESULT_UN); + status |= test__comparesf2(0x00000000, 0x7fe42e09, RESULT_UN); + status |= test__comparesf2(0x00000000, 0x80000000, RESULT_EQ); + status |= test__comparesf2(0x00000000, 0x80000001, RESULT_GT); + status |= test__comparesf2(0x00000000, 0x807fffff, RESULT_GT); + status |= test__comparesf2(0x00000000, 0x80800000, RESULT_GT); + status |= test__comparesf2(0x00000000, 0xff800000, RESULT_GT); + status |= test__comparesf2(0x00000001, 0x00000001, RESULT_EQ); + status |= test__comparesf2(0x00000001, 0x3f7fffff, RESULT_LT); + status |= test__comparesf2(0x00000001, 0x3f800000, RESULT_LT); + status |= test__comparesf2(0x00000001, 0x3ffffffe, RESULT_LT); + status |= test__comparesf2(0x00000001, 0x3fffffff, RESULT_LT); + status |= test__comparesf2(0x00000001, 0x7effffff, RESULT_LT); + status |= test__comparesf2(0x00000001, 0x7f000000, RESULT_LT); + status |= test__comparesf2(0x00000001, 0x7f7ffffe, RESULT_LT); + status |= test__comparesf2(0x00000001, 0x7f7fffff, RESULT_LT); + status |= test__comparesf2(0x00000001, 0x7f94d5b9, RESULT_UN); + status |= test__comparesf2(0x00000001, 0x7fef53b1, RESULT_UN); + status |= test__comparesf2(0x00000001, 0x80000001, RESULT_GT); + status |= test__comparesf2(0x00000001, 0xbf7fffff, RESULT_GT); + status |= test__comparesf2(0x00000001, 0xbf800000, RESULT_GT); + status |= test__comparesf2(0x00000001, 0xbffffffe, RESULT_GT); + status |= test__comparesf2(0x00000001, 0xbfffffff, RESULT_GT); + status |= test__comparesf2(0x00000001, 0xfeffffff, RESULT_GT); + status |= test__comparesf2(0x00000001, 0xff000000, RESULT_GT); + status |= test__comparesf2(0x00000001, 0xff7ffffe, RESULT_GT); + status |= test__comparesf2(0x00000001, 0xff7fffff, RESULT_GT); + status |= test__comparesf2(0x00000002, 0x00000001, RESULT_GT); + status |= test__comparesf2(0x00000003, 0x00000002, RESULT_GT); + status |= test__comparesf2(0x00000003, 0x40400000, RESULT_LT); + status |= test__comparesf2(0x00000003, 0x40a00000, RESULT_LT); + status |= test__comparesf2(0x00000003, 0x7f000000, RESULT_LT); + status |= test__comparesf2(0x00000003, 0xc0a00000, RESULT_GT); + status |= test__comparesf2(0x00000003, 0xff000000, RESULT_GT); + status |= test__comparesf2(0x00000004, 0x00000004, RESULT_EQ); + status |= test__comparesf2(0x007ffffc, 0x807ffffc, RESULT_GT); + status |= test__comparesf2(0x007ffffd, 0x007ffffe, RESULT_LT); + status |= test__comparesf2(0x007fffff, 0x00000000, RESULT_GT); + status |= test__comparesf2(0x007fffff, 0x007ffffe, RESULT_GT); + status |= test__comparesf2(0x007fffff, 0x007fffff, RESULT_EQ); + status |= test__comparesf2(0x007fffff, 0x00800000, RESULT_LT); + status |= test__comparesf2(0x007fffff, 0x7f800000, RESULT_LT); + status |= test__comparesf2(0x007fffff, 0x7fa111d3, RESULT_UN); + status |= test__comparesf2(0x007fffff, 0x7ff43134, RESULT_UN); + status |= test__comparesf2(0x007fffff, 0x80000000, RESULT_GT); + status |= test__comparesf2(0x007fffff, 0xff800000, RESULT_GT); + status |= test__comparesf2(0x00800000, 0x00000000, RESULT_GT); + status |= test__comparesf2(0x00800000, 0x00800000, RESULT_EQ); + status |= test__comparesf2(0x00800000, 0x80800000, RESULT_GT); + status |= test__comparesf2(0x00800001, 0x00800000, RESULT_GT); + status |= test__comparesf2(0x00800001, 0x00800002, RESULT_LT); + status |= test__comparesf2(0x00ffffff, 0x01000000, RESULT_LT); + status |= test__comparesf2(0x00ffffff, 0x01000002, RESULT_LT); + status |= test__comparesf2(0x00ffffff, 0x01000004, RESULT_LT); + status |= test__comparesf2(0x01000000, 0x00ffffff, RESULT_GT); + status |= test__comparesf2(0x01000001, 0x00800001, RESULT_GT); + status |= test__comparesf2(0x01000001, 0x00ffffff, RESULT_GT); + status |= test__comparesf2(0x01000002, 0x00800001, RESULT_GT); + status |= test__comparesf2(0x017fffff, 0x01800000, RESULT_LT); + status |= test__comparesf2(0x01800000, 0x017fffff, RESULT_GT); + status |= test__comparesf2(0x01800001, 0x017fffff, RESULT_GT); + status |= test__comparesf2(0x01800002, 0x01000003, RESULT_GT); + status |= test__comparesf2(0x3f000000, 0x3f000000, RESULT_EQ); + status |= test__comparesf2(0x3f7fffff, 0x00000001, RESULT_GT); + status |= test__comparesf2(0x3f7fffff, 0x80000001, RESULT_GT); + status |= test__comparesf2(0x3f800000, 0x3f800000, RESULT_EQ); + status |= test__comparesf2(0x3f800000, 0x3f800003, RESULT_LT); + status |= test__comparesf2(0x3f800000, 0x40000000, RESULT_LT); + status |= test__comparesf2(0x3f800000, 0x40e00000, RESULT_LT); + status |= test__comparesf2(0x3f800000, 0x7fb27f62, RESULT_UN); + status |= test__comparesf2(0x3f800000, 0x7fd9d4b4, RESULT_UN); + status |= test__comparesf2(0x3f800000, 0x80000000, RESULT_GT); + status |= test__comparesf2(0x3f800000, 0xbf800000, RESULT_GT); + status |= test__comparesf2(0x3f800000, 0xbf800003, RESULT_GT); + status |= test__comparesf2(0x3f800001, 0x3f800000, RESULT_GT); + status |= test__comparesf2(0x3f800001, 0x3f800002, RESULT_LT); + status |= test__comparesf2(0x3f800001, 0xbf800000, RESULT_GT); + status |= test__comparesf2(0x3ffffffc, 0x3ffffffd, RESULT_LT); + status |= test__comparesf2(0x3fffffff, 0x00000001, RESULT_GT); + status |= test__comparesf2(0x3fffffff, 0x40000000, RESULT_LT); + status |= test__comparesf2(0x40000000, 0x3f800000, RESULT_GT); + status |= test__comparesf2(0x40000000, 0x3fffffff, RESULT_GT); + status |= test__comparesf2(0x40000000, 0x40000000, RESULT_EQ); + status |= test__comparesf2(0x40000000, 0x40000001, RESULT_LT); + status |= test__comparesf2(0x40000000, 0xc0000000, RESULT_GT); + status |= test__comparesf2(0x40000000, 0xc0000001, RESULT_GT); + status |= test__comparesf2(0x40000000, 0xc0a00000, RESULT_GT); + status |= test__comparesf2(0x40000001, 0x3f800001, RESULT_GT); + status |= test__comparesf2(0x40000001, 0x40000002, RESULT_LT); + status |= test__comparesf2(0x40000001, 0xc0000002, RESULT_GT); + status |= test__comparesf2(0x40000002, 0x3f800001, RESULT_GT); + status |= test__comparesf2(0x40000002, 0x3f800003, RESULT_GT); + status |= test__comparesf2(0x40000004, 0x40000003, RESULT_GT); + status |= test__comparesf2(0x40400000, 0x40400000, RESULT_EQ); + status |= test__comparesf2(0x407fffff, 0x407ffffe, RESULT_GT); + status |= test__comparesf2(0x407fffff, 0x40800002, RESULT_LT); + status |= test__comparesf2(0x40800001, 0x407fffff, RESULT_GT); + status |= test__comparesf2(0x40a00000, 0x00000000, RESULT_GT); + status |= test__comparesf2(0x40a00000, 0x80000000, RESULT_GT); + status |= test__comparesf2(0x40a00000, 0xbf800000, RESULT_GT); + status |= test__comparesf2(0x40a00000, 0xc0a00000, RESULT_GT); + status |= test__comparesf2(0x7d800001, 0x7d7fffff, RESULT_GT); + status |= test__comparesf2(0x7e7fffff, 0x7e7ffffe, RESULT_GT); + status |= test__comparesf2(0x7e7fffff, 0x7e800002, RESULT_LT); + status |= test__comparesf2(0x7e800000, 0x7e7fffff, RESULT_GT); + status |= test__comparesf2(0x7e800000, 0x7e800000, RESULT_EQ); + status |= test__comparesf2(0x7e800000, 0x7e800001, RESULT_LT); + status |= test__comparesf2(0x7e800001, 0x7e800000, RESULT_GT); + status |= test__comparesf2(0x7e800001, 0x7f000001, RESULT_LT); + status |= test__comparesf2(0x7e800001, 0xfe800000, RESULT_GT); + status |= test__comparesf2(0x7e800002, 0x7e000003, RESULT_GT); + status |= test__comparesf2(0x7e800004, 0x7e800003, RESULT_GT); + status |= test__comparesf2(0x7efffffe, 0x7efffffe, RESULT_EQ); + status |= test__comparesf2(0x7efffffe, 0x7effffff, RESULT_LT); + status |= test__comparesf2(0x7efffffe, 0xfeffffff, RESULT_GT); + status |= test__comparesf2(0x7effffff, 0x3f800000, RESULT_GT); + status |= test__comparesf2(0x7effffff, 0x7f000000, RESULT_LT); + status |= test__comparesf2(0x7effffff, 0xbf800000, RESULT_GT); + status |= test__comparesf2(0x7effffff, 0xff000000, RESULT_GT); + status |= test__comparesf2(0x7f000000, 0x3f800000, RESULT_GT); + status |= test__comparesf2(0x7f000000, 0x7f000000, RESULT_EQ); + status |= test__comparesf2(0x7f000000, 0x7f800000, RESULT_LT); + status |= test__comparesf2(0x7f000000, 0xbf800000, RESULT_GT); + status |= test__comparesf2(0x7f000000, 0xff000000, RESULT_GT); + status |= test__comparesf2(0x7f000000, 0xff800000, RESULT_GT); + status |= test__comparesf2(0x7f000001, 0x7f000000, RESULT_GT); + status |= test__comparesf2(0x7f000001, 0x7f000002, RESULT_LT); + status |= test__comparesf2(0x7f000001, 0xff000000, RESULT_GT); + status |= test__comparesf2(0x7f000002, 0x7e800001, RESULT_GT); + status |= test__comparesf2(0x7f7ffffe, 0x3f800000, RESULT_GT); + status |= test__comparesf2(0x7f7ffffe, 0x7f7fffff, RESULT_LT); + status |= test__comparesf2(0x7f7ffffe, 0xbf800000, RESULT_GT); + status |= test__comparesf2(0x7f7ffffe, 0xff7fffff, RESULT_GT); + status |= test__comparesf2(0x7f7fffff, 0x00000001, RESULT_GT); + status |= test__comparesf2(0x7f7fffff, 0x3f800000, RESULT_GT); + status |= test__comparesf2(0x7f7fffff, 0x7f7fffff, RESULT_EQ); + status |= test__comparesf2(0x7f7fffff, 0x7fbed1eb, RESULT_UN); + status |= test__comparesf2(0x7f7fffff, 0x7fe15ee3, RESULT_UN); + status |= test__comparesf2(0x7f7fffff, 0x80000001, RESULT_GT); + status |= test__comparesf2(0x7f7fffff, 0xbf800000, RESULT_GT); + status |= test__comparesf2(0x7f800000, 0x00000000, RESULT_GT); + status |= test__comparesf2(0x7f800000, 0x00000001, RESULT_GT); + status |= test__comparesf2(0x7f800000, 0x007fffff, RESULT_GT); + status |= test__comparesf2(0x7f800000, 0x7f000000, RESULT_GT); + status |= test__comparesf2(0x7f800000, 0x7f7fffff, RESULT_GT); + status |= test__comparesf2(0x7f800000, 0x7f800000, RESULT_EQ); + status |= test__comparesf2(0x7f800000, 0x7f91a4da, RESULT_UN); + status |= test__comparesf2(0x7f800000, 0x7fd44a09, RESULT_UN); + status |= test__comparesf2(0x7f800000, 0x80000000, RESULT_GT); + status |= test__comparesf2(0x7f800000, 0x80000001, RESULT_GT); + status |= test__comparesf2(0x7f800000, 0x807fffff, RESULT_GT); + status |= test__comparesf2(0x7f800000, 0xff000000, RESULT_GT); + status |= test__comparesf2(0x7f800000, 0xff7fffff, RESULT_GT); + status |= test__comparesf2(0x7f800000, 0xff800000, RESULT_GT); + status |= test__comparesf2(0x7f86d066, 0x00000000, RESULT_UN); + status |= test__comparesf2(0x7f85a878, 0x00000001, RESULT_UN); + status |= test__comparesf2(0x7f8c0dca, 0x007fffff, RESULT_UN); + status |= test__comparesf2(0x7f822725, 0x3f800000, RESULT_UN); + status |= test__comparesf2(0x7f853870, 0x7f7fffff, RESULT_UN); + status |= test__comparesf2(0x7fbefc9d, 0x7f800000, RESULT_UN); + status |= test__comparesf2(0x7f9f84a9, 0x7f81461b, RESULT_UN); + status |= test__comparesf2(0x7f9e2c1d, 0x7fe4a313, RESULT_UN); + status |= test__comparesf2(0x7fb0e6d0, 0x80000000, RESULT_UN); + status |= test__comparesf2(0x7fac9171, 0x80000001, RESULT_UN); + status |= test__comparesf2(0x7f824ae6, 0x807fffff, RESULT_UN); + status |= test__comparesf2(0x7fa8b9a0, 0xbf800000, RESULT_UN); + status |= test__comparesf2(0x7f92a1cd, 0xff7fffff, RESULT_UN); + status |= test__comparesf2(0x7fbe5d29, 0xff800000, RESULT_UN); + status |= test__comparesf2(0x7fcc9a57, 0x00000000, RESULT_UN); + status |= test__comparesf2(0x7fec9d71, 0x00000001, RESULT_UN); + status |= test__comparesf2(0x7fd5db76, 0x007fffff, RESULT_UN); + status |= test__comparesf2(0x7fd003d9, 0x3f800000, RESULT_UN); + status |= test__comparesf2(0x7fca0684, 0x7f7fffff, RESULT_UN); + status |= test__comparesf2(0x7fc46aa0, 0x7f800000, RESULT_UN); + status |= test__comparesf2(0x7ff72b19, 0x7faee637, RESULT_UN); + status |= test__comparesf2(0x7fe9e0c1, 0x7fcc2788, RESULT_UN); + status |= test__comparesf2(0x7fc571ea, 0x80000000, RESULT_UN); + status |= test__comparesf2(0x7fd81a54, 0x80000001, RESULT_UN); + status |= test__comparesf2(0x7febdfaf, 0x807fffff, RESULT_UN); + status |= test__comparesf2(0x7ffa1f94, 0xbf800000, RESULT_UN); + status |= test__comparesf2(0x7ff38fa0, 0xff7fffff, RESULT_UN); + status |= test__comparesf2(0x7fdf3502, 0xff800000, RESULT_UN); + status |= test__comparesf2(0x80000000, 0x00000000, RESULT_EQ); + status |= test__comparesf2(0x80000000, 0x00000001, RESULT_LT); + status |= test__comparesf2(0x80000000, 0x007fffff, RESULT_LT); + status |= test__comparesf2(0x80000000, 0x7f000000, RESULT_LT); + status |= test__comparesf2(0x80000000, 0x7f800000, RESULT_LT); + status |= test__comparesf2(0x80000000, 0x7fbdfb72, RESULT_UN); + status |= test__comparesf2(0x80000000, 0x7fdd528e, RESULT_UN); + status |= test__comparesf2(0x80000000, 0x80000001, RESULT_GT); + status |= test__comparesf2(0x80000000, 0x807fffff, RESULT_GT); + status |= test__comparesf2(0x80000000, 0x80800000, RESULT_GT); + status |= test__comparesf2(0x80000000, 0xbf800000, RESULT_GT); + status |= test__comparesf2(0x80000000, 0xff800000, RESULT_GT); + status |= test__comparesf2(0x80000001, 0x00000001, RESULT_LT); + status |= test__comparesf2(0x80000001, 0x3f7fffff, RESULT_LT); + status |= test__comparesf2(0x80000001, 0x3f800000, RESULT_LT); + status |= test__comparesf2(0x80000001, 0x3ffffffe, RESULT_LT); + status |= test__comparesf2(0x80000001, 0x3fffffff, RESULT_LT); + status |= test__comparesf2(0x80000001, 0x7effffff, RESULT_LT); + status |= test__comparesf2(0x80000001, 0x7f000000, RESULT_LT); + status |= test__comparesf2(0x80000001, 0x7f7ffffe, RESULT_LT); + status |= test__comparesf2(0x80000001, 0x7f7fffff, RESULT_LT); + status |= test__comparesf2(0x80000001, 0x7fac481a, RESULT_UN); + status |= test__comparesf2(0x80000001, 0x7fcf111d, RESULT_UN); + status |= test__comparesf2(0x80000001, 0x80000001, RESULT_EQ); + status |= test__comparesf2(0x80000001, 0xbf7fffff, RESULT_GT); + status |= test__comparesf2(0x80000001, 0xbf800000, RESULT_GT); + status |= test__comparesf2(0x80000001, 0xbffffffe, RESULT_GT); + status |= test__comparesf2(0x80000001, 0xbfffffff, RESULT_GT); + status |= test__comparesf2(0x80000001, 0xfeffffff, RESULT_GT); + status |= test__comparesf2(0x80000001, 0xff000000, RESULT_GT); + status |= test__comparesf2(0x80000001, 0xff7ffffe, RESULT_GT); + status |= test__comparesf2(0x80000001, 0xff7fffff, RESULT_GT); + status |= test__comparesf2(0x80000002, 0x80000001, RESULT_LT); + status |= test__comparesf2(0x80000003, 0x40400000, RESULT_LT); + status |= test__comparesf2(0x80000003, 0x7f000000, RESULT_LT); + status |= test__comparesf2(0x80000003, 0x80000002, RESULT_LT); + status |= test__comparesf2(0x80000003, 0xff000000, RESULT_GT); + status |= test__comparesf2(0x80000004, 0x80000004, RESULT_EQ); + status |= test__comparesf2(0x807ffffd, 0x807ffffe, RESULT_GT); + status |= test__comparesf2(0x807fffff, 0x00000000, RESULT_LT); + status |= test__comparesf2(0x807fffff, 0x007fffff, RESULT_LT); + status |= test__comparesf2(0x807fffff, 0x7f800000, RESULT_LT); + status |= test__comparesf2(0x807fffff, 0x7faf07f6, RESULT_UN); + status |= test__comparesf2(0x807fffff, 0x7fd18a54, RESULT_UN); + status |= test__comparesf2(0x807fffff, 0x80000000, RESULT_LT); + status |= test__comparesf2(0x807fffff, 0x807ffffe, RESULT_LT); + status |= test__comparesf2(0x807fffff, 0x807fffff, RESULT_EQ); + status |= test__comparesf2(0x807fffff, 0x80800000, RESULT_GT); + status |= test__comparesf2(0x807fffff, 0xff800000, RESULT_GT); + status |= test__comparesf2(0x80800000, 0x00000000, RESULT_LT); + status |= test__comparesf2(0x80800000, 0x00800000, RESULT_LT); + status |= test__comparesf2(0x80800001, 0x80800000, RESULT_LT); + status |= test__comparesf2(0x80800001, 0x80800002, RESULT_GT); + status |= test__comparesf2(0x80ffffff, 0x81000000, RESULT_GT); + status |= test__comparesf2(0x80ffffff, 0x81000002, RESULT_GT); + status |= test__comparesf2(0x80ffffff, 0x81000004, RESULT_GT); + status |= test__comparesf2(0x81000000, 0x80ffffff, RESULT_LT); + status |= test__comparesf2(0x81000001, 0x80800001, RESULT_LT); + status |= test__comparesf2(0x81000001, 0x80ffffff, RESULT_LT); + status |= test__comparesf2(0x81000002, 0x80800001, RESULT_LT); + status |= test__comparesf2(0x817fffff, 0x81800000, RESULT_GT); + status |= test__comparesf2(0x81800000, 0x817fffff, RESULT_LT); + status |= test__comparesf2(0x81800001, 0x817fffff, RESULT_LT); + status |= test__comparesf2(0x81800002, 0x81000003, RESULT_LT); + status |= test__comparesf2(0xbf800000, 0x3f800003, RESULT_LT); + status |= test__comparesf2(0xbf800000, 0x7fa66ee9, RESULT_UN); + status |= test__comparesf2(0xbf800000, 0x7fe481ef, RESULT_UN); + status |= test__comparesf2(0xbf800000, 0x80000000, RESULT_LT); + status |= test__comparesf2(0xbf800000, 0xbf800003, RESULT_GT); + status |= test__comparesf2(0xbf800001, 0x3f800000, RESULT_LT); + status |= test__comparesf2(0xbf800001, 0xbf800000, RESULT_LT); + status |= test__comparesf2(0xbf800001, 0xbf800002, RESULT_GT); + status |= test__comparesf2(0xbffffffc, 0xbffffffd, RESULT_GT); + status |= test__comparesf2(0xbfffffff, 0x00000001, RESULT_LT); + status |= test__comparesf2(0xbfffffff, 0xc0000000, RESULT_GT); + status |= test__comparesf2(0xc0000000, 0x40000001, RESULT_LT); + status |= test__comparesf2(0xc0000000, 0xbfffffff, RESULT_LT); + status |= test__comparesf2(0xc0000000, 0xc0000001, RESULT_GT); + status |= test__comparesf2(0xc0000001, 0x40000002, RESULT_LT); + status |= test__comparesf2(0xc0000001, 0xbf800001, RESULT_LT); + status |= test__comparesf2(0xc0000001, 0xc0000002, RESULT_GT); + status |= test__comparesf2(0xc0000002, 0xbf800001, RESULT_LT); + status |= test__comparesf2(0xc0000002, 0xbf800003, RESULT_LT); + status |= test__comparesf2(0xc0000004, 0xc0000003, RESULT_LT); + status |= test__comparesf2(0xc0400000, 0x40400000, RESULT_LT); + status |= test__comparesf2(0xc07fffff, 0xc07ffffe, RESULT_LT); + status |= test__comparesf2(0xc07fffff, 0xc0800002, RESULT_GT); + status |= test__comparesf2(0xc0800001, 0xc07fffff, RESULT_LT); + status |= test__comparesf2(0xfd800001, 0xfd7fffff, RESULT_LT); + status |= test__comparesf2(0xfe7fffff, 0xfe7ffffe, RESULT_LT); + status |= test__comparesf2(0xfe7fffff, 0xfe800002, RESULT_GT); + status |= test__comparesf2(0xfe800000, 0xfe7fffff, RESULT_LT); + status |= test__comparesf2(0xfe800000, 0xfe800001, RESULT_GT); + status |= test__comparesf2(0xfe800001, 0x7e800000, RESULT_LT); + status |= test__comparesf2(0xfe800001, 0xfe800000, RESULT_LT); + status |= test__comparesf2(0xfe800001, 0xff000001, RESULT_GT); + status |= test__comparesf2(0xfe800002, 0xfe000003, RESULT_LT); + status |= test__comparesf2(0xfe800004, 0xfe800003, RESULT_LT); + status |= test__comparesf2(0xfefffffe, 0x7efffffe, RESULT_LT); + status |= test__comparesf2(0xfefffffe, 0x7effffff, RESULT_LT); + status |= test__comparesf2(0xfefffffe, 0xfefffffe, RESULT_EQ); + status |= test__comparesf2(0xfefffffe, 0xfeffffff, RESULT_GT); + status |= test__comparesf2(0xfeffffff, 0x3f800000, RESULT_LT); + status |= test__comparesf2(0xfeffffff, 0x7f000000, RESULT_LT); + status |= test__comparesf2(0xfeffffff, 0xbf800000, RESULT_LT); + status |= test__comparesf2(0xfeffffff, 0xff000000, RESULT_GT); + status |= test__comparesf2(0xff000000, 0x00000000, RESULT_LT); + status |= test__comparesf2(0xff000000, 0x3f800000, RESULT_LT); + status |= test__comparesf2(0xff000000, 0x7f800000, RESULT_LT); + status |= test__comparesf2(0xff000000, 0x80000000, RESULT_LT); + status |= test__comparesf2(0xff000000, 0xbf800000, RESULT_LT); + status |= test__comparesf2(0xff000000, 0xff000000, RESULT_EQ); + status |= test__comparesf2(0xff000000, 0xff800000, RESULT_GT); + status |= test__comparesf2(0xff000001, 0x7f000000, RESULT_LT); + status |= test__comparesf2(0xff000001, 0xff000000, RESULT_LT); + status |= test__comparesf2(0xff000001, 0xff000002, RESULT_GT); + status |= test__comparesf2(0xff000002, 0xfe800001, RESULT_LT); + status |= test__comparesf2(0xff7ffffe, 0x3f800000, RESULT_LT); + status |= test__comparesf2(0xff7ffffe, 0x7f7fffff, RESULT_LT); + status |= test__comparesf2(0xff7ffffe, 0xbf800000, RESULT_LT); + status |= test__comparesf2(0xff7ffffe, 0xff7fffff, RESULT_GT); + status |= test__comparesf2(0xff7fffff, 0x00000001, RESULT_LT); + status |= test__comparesf2(0xff7fffff, 0x3f800000, RESULT_LT); + status |= test__comparesf2(0xff7fffff, 0x7f919cff, RESULT_UN); + status |= test__comparesf2(0xff7fffff, 0x7fd729a7, RESULT_UN); + status |= test__comparesf2(0xff7fffff, 0x80000001, RESULT_LT); + status |= test__comparesf2(0xff7fffff, 0xbf800000, RESULT_LT); + status |= test__comparesf2(0xff7fffff, 0xff7fffff, RESULT_EQ); + status |= test__comparesf2(0xff800000, 0x00000000, RESULT_LT); + status |= test__comparesf2(0xff800000, 0x00000001, RESULT_LT); + status |= test__comparesf2(0xff800000, 0x007fffff, RESULT_LT); + status |= test__comparesf2(0xff800000, 0x7f000000, RESULT_LT); + status |= test__comparesf2(0xff800000, 0x7f7fffff, RESULT_LT); + status |= test__comparesf2(0xff800000, 0x7f800000, RESULT_LT); + status |= test__comparesf2(0xff800000, 0x7fafdbc1, RESULT_UN); + status |= test__comparesf2(0xff800000, 0x7fec80fe, RESULT_UN); + status |= test__comparesf2(0xff800000, 0x80000000, RESULT_LT); + status |= test__comparesf2(0xff800000, 0x80000001, RESULT_LT); + status |= test__comparesf2(0xff800000, 0x807fffff, RESULT_LT); + status |= test__comparesf2(0xff800000, 0xff000000, RESULT_LT); + status |= test__comparesf2(0xff800000, 0xff7fffff, RESULT_LT); + status |= test__comparesf2(0xff800000, 0xff800000, RESULT_EQ); + + return status; +} _______________________________________________ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
