[llvm-branch-commits] [compiler-rt] [compiler-rt][ARM] Optimized integer -> FP conversions (PR #179928)

Simon Tatham via llvm-branch-commits Thu, 05 Feb 2026 04:04:20 -0800

https://github.com/statham-arm created 
https://github.com/llvm/llvm-project/pull/179928


This commit adds a total of 8 new functions, all converting an integer to a 
floating-point number, varying in 3 independent choices:

* input integer size (32-bit or 64-bit)
* input integer type (signed or unsigned)
* output float format (32-bit or 64-bit)

The two conversions of 64-bit integer to 32-bit float live in the same source 
file, to save code size, since that conversion is one of the more complicated 
ones and the two functions can share most of their code, with only a few 
instructions differing at the start to handle negative numbers (or not).

>From b5bb324fffbbb1725c89d87cd1845f29f8dba24d Mon Sep 17 00:00:00 2001
From: Simon Tatham <[email protected]>
Date: Thu, 29 Jan 2026 16:14:34 +0000
Subject: [PATCH] [compiler-rt][ARM] Optimized integer -> FP conversions

This commit adds a total of 8 new functions, all converting an integer
to a floating-point number, varying in 3 independent choices:

* input integer size (32-bit or 64-bit)
* input integer type (signed or unsigned)
* output float format (32-bit or 64-bit)

The two conversions of 64-bit integer to 32-bit float live in the same
source file, to save code size, since that conversion is one of the
more complicated ones and the two functions can share most of their
code, with only a few instructions differing at the start to handle
negative numbers (or not).
---
 compiler-rt/lib/builtins/CMakeLists.txt       |   8 +
 compiler-rt/lib/builtins/arm/floatdidf.S      | 210 ++++++++++++++++++
 compiler-rt/lib/builtins/arm/floatdisf.S      | 200 +++++++++++++++++
 compiler-rt/lib/builtins/arm/floatsidf.S      |  72 ++++++
 compiler-rt/lib/builtins/arm/floatsisf.S      | 113 ++++++++++
 compiler-rt/lib/builtins/arm/floatunsdidf.S   | 180 +++++++++++++++
 compiler-rt/lib/builtins/arm/floatunssidf.S   |  64 ++++++
 compiler-rt/lib/builtins/arm/floatunssisf.S   | 103 +++++++++
 .../test/builtins/Unit/floatdidfnew_test.c    |  68 ++++++
 .../test/builtins/Unit/floatdisfnew_test.c    |  59 +++++
 .../test/builtins/Unit/floatsidfnew_test.c    |  43 ++++
 .../test/builtins/Unit/floatsisfnew_test.c    |  57 +++++
 .../test/builtins/Unit/floatunsdidfnew_test.c |  57 +++++
 .../test/builtins/Unit/floatunsdisfnew_test.c |  51 +++++
 .../test/builtins/Unit/floatunssidfnew_test.c |  41 ++++
 .../test/builtins/Unit/floatunssisfnew_test.c |  55 +++++
 16 files changed, 1381 insertions(+)
 create mode 100644 compiler-rt/lib/builtins/arm/floatdidf.S
 create mode 100644 compiler-rt/lib/builtins/arm/floatdisf.S
 create mode 100644 compiler-rt/lib/builtins/arm/floatsidf.S
 create mode 100644 compiler-rt/lib/builtins/arm/floatsisf.S
 create mode 100644 compiler-rt/lib/builtins/arm/floatunsdidf.S
 create mode 100644 compiler-rt/lib/builtins/arm/floatunssidf.S
 create mode 100644 compiler-rt/lib/builtins/arm/floatunssisf.S
 create mode 100644 compiler-rt/test/builtins/Unit/floatdidfnew_test.c
 create mode 100644 compiler-rt/test/builtins/Unit/floatdisfnew_test.c
 create mode 100644 compiler-rt/test/builtins/Unit/floatsidfnew_test.c
 create mode 100644 compiler-rt/test/builtins/Unit/floatsisfnew_test.c
 create mode 100644 compiler-rt/test/builtins/Unit/floatunsdidfnew_test.c
 create mode 100644 compiler-rt/test/builtins/Unit/floatunsdisfnew_test.c
 create mode 100644 compiler-rt/test/builtins/Unit/floatunssidfnew_test.c
 create mode 100644 compiler-rt/test/builtins/Unit/floatunssisfnew_test.c

diff --git a/compiler-rt/lib/builtins/CMakeLists.txt 
b/compiler-rt/lib/builtins/CMakeLists.txt
index 5cf754135908b..ed6a9d00db069 100644
--- a/compiler-rt/lib/builtins/CMakeLists.txt
+++ b/compiler-rt/lib/builtins/CMakeLists.txt
@@ -465,6 +465,13 @@ if(COMPILER_RT_ARM_OPTIMIZED_FP AND BUILTIN_SUPPORTED_ARCH 
MATCHES "arm")
       arm/fixunsdfsi.S
       arm/fixunssfdi.S
       arm/fixunssfsi.S
+      arm/floatdidf.S
+      arm/floatdisf.S
+      arm/floatsidf.S
+      arm/floatsisf.S
+      arm/floatunsdidf.S
+      arm/floatunssidf.S
+      arm/floatunssisf.S
       arm/truncdfsf2.S
       )
     set_source_files_properties(${assembly_files}
@@ -478,6 +485,7 @@ if(COMPILER_RT_ARM_OPTIMIZED_FP AND BUILTIN_SUPPORTED_ARCH 
MATCHES "arm")
       arm/dnorm2.c
       arm/dunder.c
       )
+    set_property(SOURCE arm/floatdisf.S DIRECTORY ${COMPILER_RT_SOURCE_DIR} 
PROPERTY crt_provides floatunsdisf)
   endif()
 endif()
 
diff --git a/compiler-rt/lib/builtins/arm/floatdidf.S 
b/compiler-rt/lib/builtins/arm/floatdidf.S
new file mode 100644
index 0000000000000..6c8875209d635
--- /dev/null
+++ b/compiler-rt/lib/builtins/arm/floatdidf.S
@@ -0,0 +1,210 @@
+//===-- floatdidf.S - 64-bit signed int to double-precision FP conversion 
-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the __floatdidf function (64-bit signed integer to
+// double precision floating point conversion), with the IEEE-754 default
+// rounding (to nearest, ties to even), for the Arm and Thumb2 ISAs.
+//
+//===----------------------------------------------------------------------===//
+
+#include "../assembly.h"
+#include "endian.h"
+
+  .syntax unified
+  .text
+  .p2align 2
+
+#if __ARM_PCS_VFP
+DEFINE_COMPILERRT_FUNCTION(__floatdidf)
+  push {r4, lr}
+  bl __aeabi_l2d
+  VMOV_TO_DOUBLE(d0, r0, r1)
+  pop {r4, pc}
+#else
+DEFINE_COMPILERRT_FUNCTION_ALIAS(__floatdidf, __aeabi_l2d)
+#endif
+
+DEFINE_COMPILERRT_FUNCTION(__aeabi_l2d)
+
+  // Start by finding out the sign of the input. This instruction sets up r2 to
+  // be 0xFFFFFFFF for a _positive_ number, and 0 for a negative number.
+  mvns    r2, xh, asr #31
+
+  // Now take the absolute value, by negating the input if it was negative
+  // (which we can tell from the flags set by that MVNS).
+  //
+  // Because r2 = 0 in the case where we need to negate, we can do the negation
+  // using SUB/SBC instead of the more obvious RSB/RSC, which saves having to
+  // use an extra instruction in Thumb to work around RSC not being available.
+  bne     0f                    // if positive, branch round the negation
+  subs    xl, r2, xl
+  sbc     xh, r2, xh
+0:
+
+  // Now we just need to convert a positive integer to double, and combine it
+  // with the sign in r2.
+  //
+  // There are essentially three cases we need to separate. The leading bit of
+  // the integer is either in xh or xl; if it's in xh, it makes a difference
+  // whether it's above bit 20, because that's the case where we must shift
+  // right and potentially round.
+  //
+  // Start by assuming the high word is nonzero; if we're wrong, we'll find out
+  // in a few instructions' time and be able to try again. So we find the
+  // position of the leading bit in xh, and turn it into a left-shift count
+  // that will move the leading bit up to where it belongs in the output
+  // double.
+  clz     r3, xh
+  subs    r3, r3, #11
+
+  // If that left-shift count is negative, we're going to have to shift the
+  // mantissa right instead of left, and maybe round it. Branch out of line for
+  // the code that handles that case.
+  blo     LOCAL_LABEL(shiftdown)
+
+  // Shift xh left to bring the top word of the mantissa to the right place. By
+  // making this shift set the flags, we detect if xh was zero.
+  //
+  // We branch out of line if it _wasn't_ zero, on the theory that small input
+  // integers are likely to occur more often than large ones, so the small case
+  // should be the faster path. This is a bit of a compromise between large and
+  // small integer performance: if we wanted to prioritise small inputs above
+  // all else, we could have tested if xh=0 to begin with - but that would cost
+  // an extra instruction on the large-integer path, because it repeats work
+  // that this instruction can do in passing.
+  lsls    xh, xh, r3
+  bne     LOCAL_LABEL(highword)
+
+  // Now we've found out that xh=0, we need to repeat the CLZ instruction on
+  // xl. The simplest thing is to shift xl up by a variable distance to put its
+  // leading bit at the top; then we can do immediate shifts to move it up
+  // further to the top of the double-precision mantissa. (Otherwise you'd have
+  // to make a second shift count by subtracting from 32, using more registers
+  // and requiring more register-controlled shifts, especially awkward in
+  // Thumb.)
+  //
+  // There may not _be_ a leading bit in xl at all (just as there turned out
+  // not to have been one in xh, if we're on this path). In that case the input
+  // integer was 0, and so we should return double-precision 0, which
+  // conveniently has the same representation (xh=xl=0 already).
+  clz     r3, xl                  // decide how far to shift up
+  lsls    xh, xl, r3              // do the shift, also checking if xl = 0
+  bxeq    lr                      // if xl = 0, return zero immediately
+
+  // Now xl contains the output mantissa, with the leading bit at the top. We
+  // must shift that up another 21 bits, and recombine it with an exponent
+  // derived from r3 (telling us how far we've already shifted up), and the
+  // sign derived from r2.
+  //
+  // If r3=0 then the input value was in the range [2^31,2^32), so its exponent
+  // in double precision should be 0x41e. We want to reduce that by 1 so that
+  // the leading bit of the mantissa will increment it when we add it in. So
+  // the exponent should be 0x41d minus r3.
+  //
+  // But also, r2 is 0 for a negative number and -1 for a positive number. So
+  // we _set_ the sign bit on the output, by calculating the sign+exponent
+  // fields together as 0xc1d minus r3; then we shift r2 left and subtract it,
+  // which decrements the sign bit from 1 back to 0 if the number was positive
+  // (hence r2 had all bits set).
+  rsb     r3, r3, #0x1d           // 0x1d minus shift count
+  add     r3, r3, #0xc00          // 0xc1d minus shift count
+  sub     r3, r3, r2, lsl #11     // combine with the sign
+  lsr     r2, xh, #11             // make top word of mantissa
+  lsl     xl, xh, #21             // make bottom word of mantissa
+  add     xh, r2, r3, lsl #20     // combine top of mantissa with sign+exp
+  bx      lr
+
+LOCAL_LABEL(highword):
+  // This is the branch for numbers big enough that xh != 0, but not big enough
+  // to need to shift downwards and round.
+  //
+  // r3 is the distance that we've already shifted xh left by. We'll need to
+  // shift xl left by the same amount, and we'll also need to shift xl right by
+  // 32 minus that, to put some of its bits at the bottom of xh. Start by
+  // working out the rightward shift distance, and make the full top word of
+  // the mantissa.
+  rsb     r12, r3, #32
+#if !__thumb__
+  orr     xh, xh, xl, lsr r12
+#else
+  // In Thumb we have to do the register-controlled shift and the OR in
+  // separate instructions.
+  lsr     r12, xl, r12
+  orr     xh, xh, r12
+#endif
+  // Shift xl left as well, so that xh:xl are now the full output mantissa,
+  // with its leading bit in bit 20 of xh.
+  lsls    xl, xl, r3
+
+  // Calculate the sign and exponent, and recombine them with the mantissa.
+  // This is exactly the same method as above, except that the exponent is
+  // different, because this time r3 stores the offset between the original
+  // leading bit position and bit 20 of the mantissa, so that it's zero if the
+  // input is in the range [2^52,2^53), which would make the output exponent
+  // 0x433, or 0x432 after compensating for the leading mantissa bit.
+  rsb     r3, r3, #0x32           // 0x32 minus shift count
+  add     r3, r3, #0xc00          // 0xc32 minus shift count
+  sub     r3, r3, r2, lsl #11     // combine with the sign
+  add     xh, xh, r3, lsl #20     // and with the top word of the mantissa
+  bx      lr
+
+LOCAL_LABEL(shiftdown):
+  // This is the branch for numbers so big that the mantissa has to be shifted
+  // _right_, so that some of the mantissa is shifted off the bottom and the
+  // number has to be rounded.
+  //
+  // r3 contains the shift count, but it's currently negative (it was
+  // calculated as a left shift). So it's in a good state to use for
+  // calculating the output exponent, and therefore we do that first, while
+  // it's convenient.
+  rsb     r12, r3, #0x32          // 0x32 minus shift count
+  add     r12, r12, #0xc00        // 0xc32 minus shift count
+  sub     r2, r12, r2, lsl #11    // combine with the sign
+
+  // Shift the mantissa down to the right position, capturing the bits shifted
+  // off the bottom at the top of r3. We'll need to temporarily push a couple
+  // of extra registers for this part, because we need to calculate how far to
+  // shift xh and xl right, but also how far to shift them left to get the bits
+  // shifted out of each one.
+  push    {r4,lr}
+  rsb     r4, r3, #0              // r4 = right-shift count
+  rsb     lr, r4, #32             // lr = left-shift count
+  lsl     r12, xh, lr             // r12 = bits shifted out of xh
+  lsr     xh, xh, r4              // shift xh right to make its final value
+  lsl     r3, xl, lr              // r3 = bits shifted out of xl
+#if !__thumb__
+  orrs    xl, r12, xl, lsr r4     // shift xl right and combine with r12
+#else
+  // In Thumb we have to do the register-controlled shift and the OR in
+  // separate instructions.
+  lsrs    xl, xl, r4
+  orr     xl, xl, r12
+#endif
+  pop     {r4,lr}
+
+  // Now xh:xl contains the unrounded output mantissa; r2 contains its sign and
+  // exponent; and r3 contains the bits shifted off the bottom. Also, the
+  // single flag-setting shift in the sequence above was the one that shifted
+  // xl right, so the carry flag contains the bit just off the bottom, i.e. the
+  // bit that tells us whether we need to round up.
+  //
+  // Recombine the mantissa with the sign and exponent, and then if C is clear,
+  // we don't need to round up, and can return.
+  add     xh, xh, r2, lsl #20     // put back the sign+exponent
+  bxcc    lr                      // return if we don't have to round
+
+  // We're rounding up, and we may also need to round to even.
+  adds    xl, xl, #1              // increment the mantissa to round up
+  adc     xh, xh, #0              //   and propagate a carry if any
+  lsls    r3, r3, #1              // set Z if we had an exact halfway case
+  biceq   xl, xl, #1              //   and round back to even if so
+  bx      lr
+
+END_COMPILERRT_FUNCTION(__aeabi_l2d)
+
+NO_EXEC_STACK_DIRECTIVE
diff --git a/compiler-rt/lib/builtins/arm/floatdisf.S 
b/compiler-rt/lib/builtins/arm/floatdisf.S
new file mode 100644
index 0000000000000..7048c41a01f3a
--- /dev/null
+++ b/compiler-rt/lib/builtins/arm/floatdisf.S
@@ -0,0 +1,200 @@
+//===-- floatdisf.S - 64-bit integer to single-precision FP conversion 
----===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the __floatdisf and __floatunsdisf functions (64-bit
+// signed and unsigned integer to single precision floating point conversion),
+// with the IEEE-754 default rounding (to nearest, ties to even), for the Arm
+// and Thumb2 ISAs.
+//
+//===----------------------------------------------------------------------===//
+
+#include "../assembly.h"
+#include "endian.h"
+
+  .syntax unified
+  .text
+  .p2align 2
+
+#if __ARM_PCS_VFP
+DEFINE_COMPILERRT_FUNCTION(__floatdisf)
+  push {r4, lr}
+  bl __aeabi_l2f
+  vmov s0, r0
+  pop {r4, pc}
+#else
+DEFINE_COMPILERRT_FUNCTION_ALIAS(__floatdisf, __aeabi_l2f)
+#endif
+
+DEFINE_COMPILERRT_FUNCTION(__aeabi_l2f)
+
+  // Isolate the input integer's sign bit in r2, and if the input is negative,
+  // negate it.
+  ands    r2, xh, #0x80000000
+  bpl     0f                    // if positive, skip the negation
+  rsbs    xl, xl, #0
+#if !__thumb__
+  rsc     xh, xh, #0
+#else
+  // Thumb has no RSC, so simulate it by bitwise inversion and then ADC
+  mvn     xh, xh
+  adc     xh, xh, #0
+#endif
+0:
+
+  // Combine the sign in r2 with the FP exponent of 1. So r2 now holds the
+  // single-precision encoding of +1 or -1 as appropriate.
+  orr     r2, r2, #0x3f800000
+
+  // Now we have a positive 64-bit integer in xh,xl, and a starting sign +
+  // exponent in r2.
+  //
+  // We also come here from the unsigned-integer conversion function below, so
+  // we must handle xh,xl having any possible values at all, even 2^63 or
+  // greater.
+LOCAL_LABEL(normalise):
+
+  // Add 30 to the exponent in r2, so that it holds +2^30 or -2^30. The idea is
+  // that after we normalise the input integer into a FP mantissa with the
+  // topmost 1 in bit 23, adding that will increment by one more, so that this
+  // exponent will be correct if the input has its high bit in bit 31. We'll
+  // decrease the exponent if CLZ returns a positive value, and increment it by
+  // 32 if the high word is used.
+  //
+  // You might ask why we didn't set up r2 to have this value in the first
+  // place, by ORRing the sign bit with 0x4e800000 instead of 0x3f800000. The
+  // answer is because 0x4e800000 can't be represented in the immediate field
+  // of an AArch32 data-processing instruction, so we can't avoid using two
+  // instructions.
+  add     r2, r2, #30 << 23
+
+  // Start setting up r3 to be the exponent adjustment, and set xh to be the
+  // highest _nonzero_ word of the input. If xh = 0, set r3 = 0 and copy xl
+  // (the only nonzero input word) into xh; if xh != 0, set r3 = 32.
+  movs    r3, xh                // sets r3=0 if xh=0, testing at the same time
+  movne   r3, #32               // if that didn't happen, set r3=32
+  movseq  xh, xl                // and otherwise, copy xl into xh
+
+  // Using a MOVS for the final copy has the side effect that we've also just
+  // tested whether xh = xl = 0. If so, then the entire input value was zero,
+  // so we should return 0. Conveniently, that's the value in both xl and xh
+  // right now, so no matter which of those is r0 (which varies with
+  // endianness) we can just return.
+  bxeq    lr
+
+  // Now we know xh contains the highest set bit of the input. Find that bit,
+  // shift it up to the top of the word, and adjust the shift count
+  // appropriately.
+  //
+  // After this, r3 contains the full exponent adjustment we'll need to add to
+  // the starting exponent in r2: it takes values from -31 (if the input was 1)
+  // to +32 (if the input was 2^63 or bigger).
+  clz     r12, xh
+  mov     xh, xh, lsl r12
+  sub     r3, r3, r12
+
+  // If the input integer is < 2^32, then we've now set up xh to be the full
+  // output mantissa (with its leading bit at the top of the word). If not,
+  // then we still need to add some bits from xl.
+  //
+  // We don't need to spend an instruction on deciding which: it's enough to
+  // just shift xl right by whatever is in r3. In the case where we don't want
+  // it (because the bits in xl are already in the output mantissa), r3 <= 0.
+  // If r3 = 0 (the input was an exactly 32-bit integer) then the bits in xl
+  // will exactly overlay the ones already in xh and make no difference; if r3
+  // < 0 then the AArch32 shift instruction semantics will treat it as a shift
+  // of more than 32 bits, shifting xl right off the bottom of the word, and
+  // again not modify xh.
+#if !__thumb__
+  orr     xh, xh, xl, lsr r3   // if shift negative then xh unaltered
+#else
+  // Thumb can't fold a register-controlled shift into an ORR, so we must use
+  // two separate instructions.
+  lsr     r12, xl, r3
+  orr     xh, xh, r12
+#endif
+
+  // Combine the exponent adjustment in r3 with the starting exponent and sign
+  // in r2. These parts of the output are now ready to combine with the
+  // mantissa, once we've shifted it down and rounded it.
+  add     r2, r2, r3, lsl #23
+
+  // Now we must round. The mantissa in r12 contains the top 32 bits of the
+  // full result, including the bit we're going to shift just off the bottom
+  // (which controls the basic 'round up or down?' question). So we can start
+  // by checking those, which will handle most cases.
+
+  // This shift moves the round bit off the top of xh into the carry flag, so
+  // that C is set if we're rounding up. It also sets Z if all the bits below
+  // that are zero, which _might_ mean we need to round to even, but only if
+  // the further bits in xl are also zero. But if Z is _not_ set then we can
+  // return without checking xl.
+  lsls    r12, xh, #25
+
+#ifndef __BIG_ENDIAN__
+  // We're about to overwrite r0 with the preliminary output. This will be our
+  // last use of xh, but we still need xl later. So in little-endian mode,
+  // where xl _is_ r0, we must spend an extra instruction on saving it.
+  mov     r12, xl
+#endif
+
+  // Recombine the mantissa (shifted down to the right position) with the sign
+  // and exponent in r2. Using ADC also rounds up if C is set.
+  adc     r0, r2, xh, lsr #8
+
+  // If C was clear, we didn't round up, so we don't need to undo that by
+  // rounding to even. And if Z was clear, we're not rounding to even anyway.
+  // So in either case, we're done.
+  bxcc    lr
+  bxne    lr
+
+  // The slow path: nothing in the top 32 bits of the mantissa ruled out having
+  // to round to even. Now we must check the rest of the mantissa bits in xl.
+  //
+  // This RSB instruction converts the previous exponent adjustment value (-31
+  // for smallest integer, +32 for largest) into a value from 0 (_largest_
+  // integer) to 63 (smallest). So if the integer occupied n bits of xh, then
+  // 32-n bits of xl ended up in the initial mantissa word, so shifting xl left
+  // by 32-n will catch precisely the bits of xl that didn't. And if the
+  // integer was entirely in xl, then this shift count will be >=32, so the
+  // left shift will throw away all of xl.
+  rsb     r3, r3, #32
+
+  // Shift xl to include just the shifted-off bits, setting Z if they're all
+  // zero. Then we know whether to round to even by clearing bit 0 of the
+  // output.
+#ifdef __BIG_ENDIAN__
+  lsls    r12, xl, r3           // the low word is still in xl itself
+#else
+  lsls    r12, r12, r3          // we moved it into r12 earlier
+#endif
+  biceq   r0, r0, #1
+
+  // And whether we did that or not, we're finished.
+  bx      lr
+
+END_COMPILERRT_FUNCTION(__aeabi_l2f)
+
+#if __ARM_PCS_VFP
+DEFINE_COMPILERRT_FUNCTION(__floatunsdisf)
+  push {r4, lr}
+  bl __aeabi_ul2f
+  vmov s0, r0
+  pop {r4, pc}
+#else
+DEFINE_COMPILERRT_FUNCTION_ALIAS(__floatunsdisf, __aeabi_ul2f)
+#endif
+
+DEFINE_COMPILERRT_FUNCTION(__aeabi_ul2f)
+  // Jump to l2f_normalise above, without negating the input, and having set up
+  // r2 unconditionally to indicate that a positive output is wanted.
+  mov     r2, #0x3f800000
+  b       LOCAL_LABEL(normalise)
+
+END_COMPILERRT_FUNCTION(__aeabi_ul2f)
+
+NO_EXEC_STACK_DIRECTIVE
diff --git a/compiler-rt/lib/builtins/arm/floatsidf.S 
b/compiler-rt/lib/builtins/arm/floatsidf.S
new file mode 100644
index 0000000000000..22d9c572fff44
--- /dev/null
+++ b/compiler-rt/lib/builtins/arm/floatsidf.S
@@ -0,0 +1,72 @@
+//===-- floatsidf.S - 32-bit signed int to double-precision FP conversion 
-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the __floatsidf function (32-bit signed integer to
+// double precision floating point conversion), with the IEEE-754 default
+// rounding (to nearest, ties to even), for the Arm and Thumb2 ISAs.
+//
+//===----------------------------------------------------------------------===//
+
+#include "../assembly.h"
+#include "endian.h"
+
+  .syntax unified
+  .text
+  .p2align 2
+
+#if __ARM_PCS_VFP
+DEFINE_COMPILERRT_FUNCTION(__floatsidf)
+  push {r4, lr}
+  bl __aeabi_i2d
+  VMOV_TO_DOUBLE(d0, r0, r1)
+  pop {r4, pc}
+#else
+DEFINE_COMPILERRT_FUNCTION_ALIAS(__floatsidf, __aeabi_i2d)
+#endif
+
+DEFINE_COMPILERRT_FUNCTION(__aeabi_i2d)
+
+  // Start by separating the input into its sign and its absolute value.
+  ands    r2, r0, #0x80000000   // get sign
+  rsbmi   r0, r0, #0            // negate input if it was negative
+
+  // Shift the leading bit of the input to the top of xh. In the
+  // process, we detect a zero input, and branch out of line for it.
+  clz     r3, r0
+  lsls    xh, r0, r3
+  beq     LOCAL_LABEL(zero)
+
+  // Use the shift count in r3 to calculate the output exponent, and combine it
+  // with the sign bit in r2.
+  //
+  // If r3=0 then the input value was in the range [2^31,2^32), so its exponent
+  // in double precision should be 0x41e. We want to reduce that by 1 so that
+  // the leading bit of the mantissa will increment it when we add it in. So
+  // the exponent should be 0x41d minus r3.
+  //
+  // (For a signed integer, the only possible value in that range would be
+  // 2^31, obtained as the absolute value of an original input of -2^31.)
+  rsb     r3, r3, #0x1d         // 0x1d minus shift count
+  add     r3, r3, #0x400        // 0x41d minus shift count
+  add     r2, r2, r3, lsl #20   // r2 is now sign+exponent
+
+  // Shift the mantissa left and right to get the parts that go in xl and xh,
+  // and combine with the sign and exponent we just computed.
+  lsl     xl, xh, #21           // low bits of mantissa
+  add     xh, r2, xh, lsr #11   // high bits of mantissa + sign + exponent
+  bx      lr
+
+LOCAL_LABEL(zero):
+  // We come here if the input was zero. We've just set xh to 0, so we
+  // only need to set xl to 0 too and return.
+  mov     xl, #0
+  bx      lr
+
+END_COMPILERRT_FUNCTION(__aeabi_i2d)
+
+NO_EXEC_STACK_DIRECTIVE
diff --git a/compiler-rt/lib/builtins/arm/floatsisf.S 
b/compiler-rt/lib/builtins/arm/floatsisf.S
new file mode 100644
index 0000000000000..e7d474f81572b
--- /dev/null
+++ b/compiler-rt/lib/builtins/arm/floatsisf.S
@@ -0,0 +1,113 @@
+//===-- floatsisf.S - 32-bit signed int to single-precision FP conversion 
-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the __floatsisf function (32-bit signed integer to
+// single precision floating point conversion), with the IEEE-754 default
+// rounding (to nearest, ties to even), for the Arm and Thumb2 ISAs.
+//
+//===----------------------------------------------------------------------===//
+
+#include "../assembly.h"
+
+  .syntax unified
+  .text
+  .p2align 2
+
+#if __ARM_PCS_VFP
+DEFINE_COMPILERRT_FUNCTION(__floatsisf)
+  push {r4, lr}
+  bl __aeabi_i2f
+  vmov s0, r0
+  pop {r4, pc}
+#else
+DEFINE_COMPILERRT_FUNCTION_ALIAS(__floatsisf, __aeabi_i2f)
+#endif
+
+DEFINE_COMPILERRT_FUNCTION(__aeabi_i2f)
+
+  // Isolate the input integer's sign bit in r1, and if the input is negative,
+  // negate it.
+  ands    r1, r0, #0x80000000
+  rsbmi   r0, r0, #0
+
+  // Find the highest set bit of the input, and shift it up to the top bit. r3
+  // contains the amount we shifted by, and r2 the shifted value.
+  clz     r3, r0
+  lsls    r2, r0, r3
+
+  // Convert the shift distance into the exponent of the output float. The
+  // exponent for an integer with bit 31 set should be 0x7f (the IEEE exponent
+  // bias) plus 31, which is 0x9e. Here we reduce that by 1, because when we
+  // add the mantissa to it, the leading mantissa bit will increment it.
+  rsb     r3, r3, #0x9d
+
+  // A side effect of the LSLS above was to set the Z flag if the input integer
+  // was actually zero. In that situation, we can just return immediately,
+  // because r0 still _contains_ the input integer, which has the same
+  // representation as the floating-point +0 that we should return.
+#if !__thumb__
+  bxeq    lr
+#else
+  // In Thumb, we do the conditional return by branching to a return
+  // instruction later in the function. This costs more time in the case where
+  // the return is taken, but saves an IT in the case where it's not, and we
+  // assume that nonzero integers are converted to FP more often than zero is.
+  // (This also improves the _worst-case_ running time, because the nonzero
+  // code path is the limiting factor.)
+  beq     LOCAL_LABEL(return)
+#endif
+
+  // Shift the exponent up to its final bit position, and combine it with the
+  // sign.
+  orrs    r1, r1, r3, lsl #23
+
+  // Recombine the mantissa with the sign+exponent, and round. This is done
+  // differently between Arm and Thumb.
+#if !__thumb__
+  // Arm rounding sequence: shift the round bit off the top of r2 into C, and
+  // simultaneously set Z if the lower-down bits are all zero.
+  lsls    r3, r2, #25
+  // Recombine mantissa with sign+exponent, using ADC so that this also adds 1
+  // if we're rounding up.
+  adc     r0, r1, r2, lsr #8
+  // If C was clear, we can't possibly need to round to even, so return.
+  bxcc    lr
+  // Now we've definitely rounded up, and if Z is set, round to even.
+  biceq   r0, r0, #1
+  bx      lr
+#else
+  // Thumb rounding sequence: we do things in a slightly different order, by
+  // recombining first with plain ADD, and _then_ testing the round bit. On
+  // simple M-profile CPUs like Cortex-M3, this avoids the IT instruction
+  // (inserted before BXCC lr) costing a cycle, because it immediately follows
+  // a 16-bit LSLS instruction, so the CPU had already fetched it.
+  //
+  // So we save a cycle in the case where we don't round up, at the cost of a
+  // cycle in the case where we do (requiring a separate ADD instruction after
+  // the BXCC lr isn't taken). We expect that this is a good trade, on the
+  // theory that _most_ integers converted into floating point are not large
+  // enough to need rounding at all, so all the exact cases _and_ half the
+  // inexact ones will benefit from the saving.
+  add     r0, r1, r2, lsr #8    // r0 is now sign+exp+mant, unrounded
+#if __thumb__
+  lsls.N  r3, r2, #25           // .N to make sure it's assembled as 16-bit
+#else
+  lsls    r3, r2, #25           // GNU assembler treats .N as an error in Arm
+#endif
+  bxcc    lr
+  // Now if we didn't take the return, we must definitely round up, and
+  // conditionally round to even.
+  add     r0, r0, #1
+  biceq   r0, r0, #1
+LOCAL_LABEL(return):                     // label we branch to from the 0 case 
above
+  bx      lr
+#endif
+
+END_COMPILERRT_FUNCTION(__aeabi_i2f)
+
+NO_EXEC_STACK_DIRECTIVE
diff --git a/compiler-rt/lib/builtins/arm/floatunsdidf.S 
b/compiler-rt/lib/builtins/arm/floatunsdidf.S
new file mode 100644
index 0000000000000..8c368df37169c
--- /dev/null
+++ b/compiler-rt/lib/builtins/arm/floatunsdidf.S
@@ -0,0 +1,180 @@
+//===-- floatunsdidf.S - 64-bit unsigned int to double-precision FP 
conversion//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the __floatunsdidf function (64-bit unsigned integer to
+// double precision floating point conversion), with the IEEE-754 default
+// rounding (to nearest, ties to even), for the Arm and Thumb2 ISAs.
+//
+//===----------------------------------------------------------------------===//
+
+#include "../assembly.h"
+#include "endian.h"
+
+  .syntax unified
+  .text
+  .p2align 2
+
+#if __ARM_PCS_VFP
+DEFINE_COMPILERRT_FUNCTION(__floatunsdidf)
+  push {r4, lr}
+  bl __aeabi_ul2d
+  VMOV_TO_DOUBLE(d0, r0, r1)
+  pop {r4, pc}
+#else
+DEFINE_COMPILERRT_FUNCTION_ALIAS(__floatunsdidf, __aeabi_ul2d)
+#endif
+
+DEFINE_COMPILERRT_FUNCTION(__aeabi_ul2d)
+
+  // There are essentially three cases we need to separate. The leading bit of
+  // the integer is either in xh or xl; if it's in xh, it makes a difference
+  // whether it's above bit 20, because that's the case where we must shift
+  // right and potentially round.
+  //
+  // Start by assuming the high word is nonzero; if we're wrong, we'll find out
+  // in a few instructions' time and be able to try again. So we find the
+  // position of the leading bit in xh, and turn it into a left-shift count
+  // that will move the leading bit up to where it belongs in the output
+  // double.
+  clz     r3, xh
+  subs    r3, r3, #11
+
+  // If that left-shift count is negative, we're going to have to shift the
+  // mantissa right instead of left, and maybe round it. Branch out of line for
+  // the code that handles that case.
+  blo     LOCAL_LABEL(shiftdown)
+
+  // Shift xh left to bring the top word of the mantissa to the right place. By
+  // making this shift set the flags, we detect if xh was zero.
+  //
+  // We branch out of line if it _wasn't_ zero, on the theory that small input
+  // integers are likely to occur more often than large ones, so the small case
+  // should be the faster path. This is a bit of a compromise between large and
+  // small integer performance: if we wanted to prioritise small inputs above
+  // all else, we could have tested if xh=0 to begin with - but that would cost
+  // an extra instruction on the large-integer path, because it repeats work
+  // that this instruction can do in passing.
+  lsls    xh, xh, r3
+  bne     LOCAL_LABEL(highword)
+
+  // Now we've found out that xh=0, we need to repeat the CLZ instruction on
+  // xl. The simplest thing is to shift xl up by a variable distance to put its
+  // leading bit at the top; then we can do immediate shifts to move it up
+  // further to the top of the double-precision mantissa. (Otherwise you'd have
+  // to make a second shift count by subtracting from 32, using more registers
+  // and requiring more register-controlled shifts, especially awkward in
+  // Thumb.)
+  //
+  // There may not _be_ a leading bit in xl at all (just as there turned out
+  // not to have been one in xh, if we're on this path). In that case the input
+  // integer was 0, and so we should return double-precision 0, which
+  // conveniently has the same representation (xh=xl=0 already).
+  clz     r3, xl                  // decide how far to shift up
+  lsls    xh, xl, r3              // do the shift, also checking if xl = 0
+  bxeq    lr                      // if xl = 0, return zero immediately
+
+  // Now xl contains the output mantissa, with the leading bit at the top. We
+  // must shift that up another 21 bits, and recombine it with an exponent
+  // derived from r3 (telling us how far we've already shifted up).
+  //
+  // If r3=0 then the input value was in the range [2^31,2^32), so its exponent
+  // in double precision should be 0x41e. We want to reduce that by 1 so that
+  // the leading bit of the mantissa will increment it when we add it in. So
+  // the exponent should be 0x41d minus r3.
+  rsb     r3, r3, #0x1d           // 0x1d minus shift count
+  add     r3, r3, #0x400          // 0x41d minus shift count
+  lsr     r2, xh, #11             // make top word of mantissa
+  lsl     xl, xh, #21             // make bottom word of mantissa
+  add     xh, r2, r3, lsl #20     // and combine it with exponent
+  bx      lr
+
+LOCAL_LABEL(highword):
+  // This is the branch for numbers big enough that xh != 0, but not big enough
+  // to need to shift downwards and round.
+  //
+  // r3 is the distance that we've already shifted xh left by. We'll need to
+  // shift xl left by the same amount, and we'll also need to shift xl right by
+  // 32 minus that, to put some of its bits at the bottom of xh.
+  rsb     r12, r3, #32
+#if !__thumb__
+  orr     xh, xh, xl, lsr r12
+#else
+  // In Thumb we have to do the register-controlled shift and the OR in
+  // separate instructions.
+  lsr     r12, xl, r12
+  orr     xh, xh, r12
+#endif
+  // Shift xl left as well, so that xh:xl are now the full output mantissa,
+  // with its leading bit in bit 20 of xh.
+  lsls    xl, xl, r3
+
+  // Calculate the exponent, and recombine it with the mantissa. This is
+  // exactly the same method as above, except that the exponent is different,
+  // because this time r3 stores the offset between the original leading bit
+  // position and bit 20 of the mantissa, so that it's zero if the input is in
+  // the range [2^52,2^53), which would make the output exponent 0x433, or
+  // 0x432 after compensating for the leading mantissa bit.
+  rsb     r3, r3, #0x32           // 0x32 minus shift count
+  add     r3, r3, #0x400          // 0x432 minus shift count
+  add     xh, xh, r3, lsl #20     // combine with the top word of the mantissa
+  bx      lr
+
+LOCAL_LABEL(shiftdown):
+  // This is the branch for numbers so big that the mantissa has to be shifted
+  // _right_, so that some of the mantissa is shifted off the bottom and the
+  // number has to be rounded.
+  //
+  // r3 contains the shift count, but it's currently negative (it was
+  // calculated as a left shift). So it's in a good state to use for
+  // calculating the output exponent, and therefore we do that first, while
+  // it's convenient.
+  rsb     r2, r3, #0x32           // 0x32 minus shift count
+  add     r2, r2, #0x400          // 0x432 minus shift count
+
+  // Shift the mantissa down to the right position, capturing the bits shifted
+  // off the bottom at the top of r3. We'll need to temporarily push a couple
+  // of extra registers for this part, because we need to calculate how far to
+  // shift xh and xl right, but also how far to shift them left to get the bits
+  // shifted out of each one.
+  push    {r4,lr}
+  rsb     r4, r3, #0              // r4 = right-shift count
+  rsb     lr, r4, #32             // lr = left-shift count
+  lsl     r12, xh, lr             // r12 = bits shifted out of xh
+  lsr     xh, xh, r4              // shift xh right to make its final value
+  lsl     r3, xl, lr              // r3 = bits shifted out of xl
+#if !__thumb__
+  orrs    xl, r12, xl, lsr r4     // shift xl right and combine with r12
+#else
+  // In Thumb we have to do the register-controlled shift and the OR in
+  // separate instructions.
+  lsrs    xl, xl, r4
+  orr     xl, xl, r12
+#endif
+  pop     {r4,lr}
+
+  // Now xh:xl contains the unrounded output mantissa; r2 contains its
+  // exponent; and r3 contains the bits shifted off the bottom. Also, the
+  // single flag-setting shift in the sequence above was the one that shifted
+  // xl right, so the carry flag contains the bit just off the bottom, i.e. the
+  // bit that tells us whether we need to round up.
+  //
+  // Recombine the mantissa with the exponent, and then if C is clear, we don't
+  // need to round up, and can return.
+  add     xh, xh, r2, lsl #20     // put back the exponent
+  bxcc    lr                      // return if we don't have to round
+
+  // We're rounding up, and we may also need to round to even.
+  adds    xl, xl, #1              // increment the mantissa to round up
+  adc     xh, xh, #0              //   and propagate a carry if any
+  lsls    r3, r3, #1              // set Z if we had an exact halfway case
+  biceq   xl, xl, #1              //   and round back to even if so
+  bx      lr
+
+END_COMPILERRT_FUNCTION(__aeabi_ul2d)
+
+NO_EXEC_STACK_DIRECTIVE
diff --git a/compiler-rt/lib/builtins/arm/floatunssidf.S 
b/compiler-rt/lib/builtins/arm/floatunssidf.S
new file mode 100644
index 0000000000000..33976ff357832
--- /dev/null
+++ b/compiler-rt/lib/builtins/arm/floatunssidf.S
@@ -0,0 +1,64 @@
+//===-- floatunssidf.S - 32-bit unsigned int to double-precision FP 
conversion//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the __floatunssidf function (32-bit unsigned integer to
+// double precision floating point conversion), with the IEEE-754 default
+// rounding (to nearest, ties to even), for the Arm and Thumb2 ISAs.
+//
+//===----------------------------------------------------------------------===//
+
+#include "../assembly.h"
+#include "endian.h"
+
+  .syntax unified
+  .text
+  .p2align 2
+
+#if __ARM_PCS_VFP
+DEFINE_COMPILERRT_FUNCTION(__floatunssidf)
+  push {r4, lr}
+  bl __aeabi_ui2d
+  VMOV_TO_DOUBLE(d0, r0, r1)
+  pop {r4, pc}
+#else
+DEFINE_COMPILERRT_FUNCTION_ALIAS(__floatunssidf, __aeabi_ui2d)
+#endif
+
+DEFINE_COMPILERRT_FUNCTION(__aeabi_ui2d)
+
+  // Shift the leading bit of the input to the top of xh. In the
+  // process, we detect a zero input, and branch out of line for it.
+  clz     r3, r0
+  lsls    xh, r0, r3
+  beq     LOCAL_LABEL(zero)
+
+  // Use the shift count in r3 to calculate the output exponent.
+  //
+  // If r3=0 then the input value was in the range [2^31,2^32), so its exponent
+  // in double precision should be 0x41e. We want to reduce that by 1 so that
+  // the leading bit of the mantissa will increment it when we add it in. So
+  // the exponent should be 0x41d minus r3.
+  rsb     r3, r3, #0x1d         // 0x1d minus shift count
+  add     r3, r3, #0x400        // 0x41d minus shift count
+  lsl     r2, r3, #20           // r2 is now shifted exponent
+
+  // Shift the mantissa left and right to get the parts that go in xl and xh,
+  // and combine with the exponent we just computed.
+  lsl     xl, xh, #21           // low bits of mantissa
+  add     xh, r2, xh, lsr #11   // high bits of mantissa + exponent
+  bx      lr
+
+LOCAL_LABEL(zero):
+  // We come here if the input was zero. We've just set xh to 0, so we
+  // only need to set xl to 0 too and return.
+  mov     xl, #0
+  bx      lr
+
+END_COMPILERRT_FUNCTION(__aeabi_ui2d)
+
+NO_EXEC_STACK_DIRECTIVE
diff --git a/compiler-rt/lib/builtins/arm/floatunssisf.S 
b/compiler-rt/lib/builtins/arm/floatunssisf.S
new file mode 100644
index 0000000000000..6f6f3281adf16
--- /dev/null
+++ b/compiler-rt/lib/builtins/arm/floatunssisf.S
@@ -0,0 +1,103 @@
+//===-- floatunssisf.S - 32-bit unsigned int to single-precision FP 
conversion//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the __floatunssisf function (32-bit unsigned integer to
+// single precision floating point conversion), with the IEEE-754 default
+// rounding (to nearest, ties to even), for the Arm and Thumb2 ISAs.
+//
+//===----------------------------------------------------------------------===//
+
+#include "../assembly.h"
+
+  .syntax unified
+  .text
+  .p2align 2
+
+#if __ARM_PCS_VFP
+DEFINE_COMPILERRT_FUNCTION(__floatunssisf)
+  push {r4, lr}
+  bl __aeabi_ui2f
+  vmov s0, r0
+  pop {r4, pc}
+#else
+DEFINE_COMPILERRT_FUNCTION_ALIAS(__floatunssisf, __aeabi_ui2f)
+#endif
+
+DEFINE_COMPILERRT_FUNCTION(__aeabi_ui2f)
+
+  // Find the highest set bit of the input, and shift it up to the top bit. r3
+  // contains the amount we shifted by, and r2 the shifted value.
+  clz     r3, r0
+  lsls    r2, r0, r3
+
+  // Convert the shift distance into the exponent of the output float. The
+  // exponent for an integer with bit 31 set should be 0x7f (the IEEE exponent
+  // bias) plus 31, which is 0x9e. Here we reduce that by 1, because when we
+  // add the mantissa to it, the leading mantissa bit will increment it.
+  rsb     r3, r3, #0x9d
+
+  // A side effect of the LSLS above was to set the Z flag if the input integer
+  // was actually zero. In that situation, we can just return immediately,
+  // because r0 still _contains_ the input integer, which has the same
+  // representation as the floating-point +0 that we should return.
+#if !__thumb__
+  bxeq    lr
+#else
+  // In Thumb, we do the conditional return by branching to a return
+  // instruction later in the function. This costs more time in the case where
+  // the return is taken, but saves an IT in the case where it's not, and we
+  // assume that nonzero integers are converted to FP more often than zero is.
+  // (This also improves the _worst-case_ running time, because the nonzero
+  // code path is the limiting factor.)
+  beq     LOCAL_LABEL(return)
+#endif
+
+  // Shift the exponent up to its final bit position.
+  lsls    r1, r3, #23
+
+  // Recombine the mantissa with the exponent, and round. This is done
+  // differently between Arm and Thumb.
+#if !__thumb__
+  // Arm rounding sequence: shift the round bit off the top of r2 into C, and
+  // simultaneously set Z if the lower-down bits are all zero.
+  lsls    r3, r2, #25
+  // Recombine mantissa with exponent, using ADC so that this also adds 1 if
+  // we're rounding up.
+  adc     r0, r1, r2, lsr #8
+  // If C was clear, we can't possibly need to round to even, so return.
+  bxcc    lr
+  // Now we've definitely rounded up, and if Z is set, round to even.
+  biceq   r0, r0, #1
+  bx      lr
+#else
+  // Thumb rounding sequence: we do things in a slightly different order, by
+  // recombining first with plain ADD, and _then_ testing the round bit. On
+  // simple M-profile CPUs like Cortex-M3, this avoids the IT instruction
+  // (inserted before BXCC lr) costing a cycle, because it immediately follows
+  // a 16-bit LSLS instruction, so the CPU had already fetched it.
+  //
+  // So we save a cycle in the case where we don't round up, at the cost of a
+  // cycle in the case where we do (requiring a separate ADD instruction after
+  // the BXCC lr isn't taken). We expect that this is a good trade, on the
+  // theory that _most_ integers converted into floating point are not large
+  // enough to need rounding at all, so all the exact cases _and_ half the
+  // inexact ones will benefit from the saving.
+  add     r0, r1, r2, lsr #8    // r0 is now exp+mant, unrounded
+  lsls.N  r3, r2, #25           // .N to make sure it's assembled as 16-bit
+  bxcc    lr
+  // Now if we didn't take the return, we must definitely round up, and
+  // conditionally round to even.
+  add     r0, r0, #1
+  biceq   r0, r0, #1
+LOCAL_LABEL(return):                    // label we branch to from the 0 case 
above
+  bx      lr
+#endif
+
+END_COMPILERRT_FUNCTION(__aeabi_ui2f)
+
+NO_EXEC_STACK_DIRECTIVE
diff --git a/compiler-rt/test/builtins/Unit/floatdidfnew_test.c 
b/compiler-rt/test/builtins/Unit/floatdidfnew_test.c
new file mode 100644
index 0000000000000..601be3f573263
--- /dev/null
+++ b/compiler-rt/test/builtins/Unit/floatdidfnew_test.c
@@ -0,0 +1,68 @@
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+// RUN: %clang_builtins %s %librt -o %t && %run %t
+// REQUIRES: librt_has_floatdidf
+
+#include "int_lib.h"
+#include <inttypes.h>
+#include <stdio.h>
+
+#include "fp_test.h"
+
+// Returns: a converted from int64_t to double
+COMPILER_RT_ABI double __floatdidf(int64_t a);
+
+int test__floatdidf(int line, uint64_t a, uint64_t expected_rep) {
+  double x = __floatdidf(a);
+  int ret = compareResultD(x, expected_rep);
+
+  if (ret) {
+    printf("error at line %d: __floatdidf(%016" PRIx64 ") = %016" PRIx64
+           ", expected %016" PRIx64 "\n",
+           line, a, toRep64(x), expected_rep);
+  }
+  return ret;
+}
+
+#define test__floatdidf(a,x) test__floatdidf(__LINE__,a,x)
+
+int main(void) {
+  int status = 0;
+
+  status |= test__floatdidf(0x0000000000000000, 0x0000000000000000);
+  status |= test__floatdidf(0x0000000000000001, 0x3ff0000000000000);
+  status |= test__floatdidf(0x0000000000000001, 0x3ff0000000000000);
+  status |= test__floatdidf(0x0000000080000000, 0x41e0000000000000);
+  status |= test__floatdidf(0x0000000080000001, 0x41e0000000200000);
+  status |= test__floatdidf(0x0000000080000003, 0x41e0000000600000);
+  status |= test__floatdidf(0x0000000080000007, 0x41e0000000e00000);
+  status |= test__floatdidf(0x00000000fffffff8, 0x41efffffff000000);
+  status |= test__floatdidf(0x00000000fffffffc, 0x41efffffff800000);
+  status |= test__floatdidf(0x00000000fffffffe, 0x41efffffffc00000);
+  status |= test__floatdidf(0x00000000ffffffff, 0x41efffffffe00000);
+  status |= test__floatdidf(0x0000082345670000, 0x42a0468ace000000);
+  status |= test__floatdidf(0x0100000000000000, 0x4370000000000000);
+  status |= test__floatdidf(0x0100000000000004, 0x4370000000000000);
+  status |= test__floatdidf(0x0100000000000008, 0x4370000000000000);
+  status |= test__floatdidf(0x010000000000000c, 0x4370000000000001);
+  status |= test__floatdidf(0x0100000000000010, 0x4370000000000001);
+  status |= test__floatdidf(0x0100000000000014, 0x4370000000000001);
+  status |= test__floatdidf(0x0100000000000018, 0x4370000000000002);
+  status |= test__floatdidf(0x010000000000001c, 0x4370000000000002);
+  status |= test__floatdidf(0x7fffffffffffffff, 0x43e0000000000000);
+  status |= test__floatdidf(0x8000000000000000, 0xc3e0000000000000);
+  status |= test__floatdidf(0x8000000000000001, 0xc3e0000000000000);
+  status |= test__floatdidf(0xfeffffffffffffe4, 0xc370000000000002);
+  status |= test__floatdidf(0xfeffffffffffffe8, 0xc370000000000002);
+  status |= test__floatdidf(0xfeffffffffffffec, 0xc370000000000001);
+  status |= test__floatdidf(0xfefffffffffffff0, 0xc370000000000001);
+  status |= test__floatdidf(0xfefffffffffffff4, 0xc370000000000001);
+  status |= test__floatdidf(0xfefffffffffffff8, 0xc370000000000000);
+  status |= test__floatdidf(0xfefffffffffffffc, 0xc370000000000000);
+  status |= test__floatdidf(0xff00000000000000, 0xc370000000000000);
+  status |= test__floatdidf(0xffe9ef445b91437b, 0xc33610bba46ebc85);
+
+  return status;
+}
diff --git a/compiler-rt/test/builtins/Unit/floatdisfnew_test.c 
b/compiler-rt/test/builtins/Unit/floatdisfnew_test.c
new file mode 100644
index 0000000000000..66a7193763b66
--- /dev/null
+++ b/compiler-rt/test/builtins/Unit/floatdisfnew_test.c
@@ -0,0 +1,59 @@
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+// RUN: %clang_builtins %s %librt -o %t && %run %t
+// REQUIRES: librt_has_floatdisf
+
+#include "int_lib.h"
+#include <inttypes.h>
+#include <stdio.h>
+
+#include "fp_test.h"
+
+// Returns: a converted from int64_t to float
+COMPILER_RT_ABI float __floatdisf(int64_t a);
+
+int test__floatdisf(int line, uint64_t a, uint32_t expected_rep) {
+  float x = __floatdisf(a);
+  int ret = compareResultF(x, expected_rep);
+
+  if (ret) {
+    printf("error at line %d: __floatdisf(%016" PRIx64 ") = %08" PRIx32
+           ", expected %08" PRIx32 "\n",
+           line, a, toRep32(x), expected_rep);
+  }
+  return ret;
+}
+
+#define test__floatdisf(a,x) test__floatdisf(__LINE__,a,x)
+
+int main(void) {
+  int status = 0;
+
+  status |= test__floatdisf(0x0000000000000000, 0x00000000);
+  status |= test__floatdisf(0x0000000000000001, 0x3f800000);
+  status |= test__floatdisf(0x0000000008000000, 0x4d000000);
+  status |= test__floatdisf(0x0000000008000004, 0x4d000000);
+  status |= test__floatdisf(0x0000000008000008, 0x4d000000);
+  status |= test__floatdisf(0x000000000800000c, 0x4d000001);
+  status |= test__floatdisf(0x0000000008000010, 0x4d000001);
+  status |= test__floatdisf(0x0000000008000014, 0x4d000001);
+  status |= test__floatdisf(0x0000000008000018, 0x4d000002);
+  status |= test__floatdisf(0x000000000800001c, 0x4d000002);
+  status |= test__floatdisf(0x0000082345000000, 0x55023450);
+  status |= test__floatdisf(0x4000004000000001, 0x5e800001);
+  status |= test__floatdisf(0x7fffffffffffffff, 0x5f000000);
+  status |= test__floatdisf(0x8000000000000000, 0xdf000000);
+  status |= test__floatdisf(0x8000000000000001, 0xdf000000);
+  status |= test__floatdisf(0xfffffffff7ffffe4, 0xcd000002);
+  status |= test__floatdisf(0xfffffffff7ffffe8, 0xcd000002);
+  status |= test__floatdisf(0xfffffffff7ffffec, 0xcd000001);
+  status |= test__floatdisf(0xfffffffff7fffff0, 0xcd000001);
+  status |= test__floatdisf(0xfffffffff7fffff4, 0xcd000001);
+  status |= test__floatdisf(0xfffffffff7fffff8, 0xcd000000);
+  status |= test__floatdisf(0xfffffffff7fffffc, 0xcd000000);
+  status |= test__floatdisf(0xfffffffff8000000, 0xcd000000);
+
+  return status;
+}
diff --git a/compiler-rt/test/builtins/Unit/floatsidfnew_test.c 
b/compiler-rt/test/builtins/Unit/floatsidfnew_test.c
new file mode 100644
index 0000000000000..55ccca3c8d2d1
--- /dev/null
+++ b/compiler-rt/test/builtins/Unit/floatsidfnew_test.c
@@ -0,0 +1,43 @@
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+// RUN: %clang_builtins %s %librt -o %t && %run %t
+// REQUIRES: librt_has_floatsidf
+
+#include "int_lib.h"
+#include <inttypes.h>
+#include <stdio.h>
+
+#include "fp_test.h"
+
+// Returns: a converted from int32_t to double
+COMPILER_RT_ABI double __floatsidf(int32_t a);
+
+int test__floatsidf(int line, uint32_t a, uint64_t expected_rep) {
+  double x = __floatsidf(a);
+  int ret = compareResultD(x, expected_rep);
+
+  if (ret) {
+    printf("error at line %d: __floatsidf(%08" PRIx32 ") = %016" PRIx64
+           ", expected %016" PRIx64 "\n",
+           line, a, toRep64(x), expected_rep);
+  }
+  return ret;
+}
+
+#define test__floatsidf(a,x) test__floatsidf(__LINE__,a,x)
+
+int main(void) {
+  int status = 0;
+
+  status |= test__floatsidf(0x00000000, 0x0000000000000000);
+  status |= test__floatsidf(0x00000001, 0x3ff0000000000000);
+  status |= test__floatsidf(0x40000200, 0x41d0000080000000);
+  status |= test__floatsidf(0x40000400, 0x41d0000100000000);
+  status |= test__floatsidf(0x7fffffff, 0x41dfffffffc00000);
+  status |= test__floatsidf(0x80000000, 0xc1e0000000000000);
+  status |= test__floatsidf(0x80000001, 0xc1dfffffffc00000);
+
+  return status;
+}
diff --git a/compiler-rt/test/builtins/Unit/floatsisfnew_test.c 
b/compiler-rt/test/builtins/Unit/floatsisfnew_test.c
new file mode 100644
index 0000000000000..ef718f15f0214
--- /dev/null
+++ b/compiler-rt/test/builtins/Unit/floatsisfnew_test.c
@@ -0,0 +1,57 @@
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+// RUN: %clang_builtins %s %librt -o %t && %run %t
+// REQUIRES: librt_has_floatsisf
+
+#include "int_lib.h"
+#include <inttypes.h>
+#include <stdio.h>
+
+#include "fp_test.h"
+
+// Returns: a converted from int32_t to float
+COMPILER_RT_ABI float __floatsisf(int32_t a);
+
+int test__floatsisf(int line, uint32_t a, uint32_t expected_rep) {
+  float x = __floatsisf(a);
+  int ret = compareResultF(x, expected_rep);
+
+  if (ret) {
+    printf("error at line %d: __floatsisf(%08" PRIx32 ") = %08" PRIx32
+           ", expected %08" PRIx32 "\n",
+           line, a, toRep32(x), expected_rep);
+  }
+  return ret;
+}
+
+#define test__floatsisf(a,x) test__floatsisf(__LINE__,a,x)
+
+int main(void) {
+  int status = 0;
+
+  status |= test__floatsisf(0x00000000, 0x00000000);
+  status |= test__floatsisf(0x00000001, 0x3f800000);
+  status |= test__floatsisf(0x08000000, 0x4d000000);
+  status |= test__floatsisf(0x08000004, 0x4d000000);
+  status |= test__floatsisf(0x08000008, 0x4d000000);
+  status |= test__floatsisf(0x0800000c, 0x4d000001);
+  status |= test__floatsisf(0x08000010, 0x4d000001);
+  status |= test__floatsisf(0x08000014, 0x4d000001);
+  status |= test__floatsisf(0x08000018, 0x4d000002);
+  status |= test__floatsisf(0x0800001c, 0x4d000002);
+  status |= test__floatsisf(0x7fffffff, 0x4f000000);
+  status |= test__floatsisf(0x80000000, 0xcf000000);
+  status |= test__floatsisf(0x80000001, 0xcf000000);
+  status |= test__floatsisf(0xf7ffffe4, 0xcd000002);
+  status |= test__floatsisf(0xf7ffffe8, 0xcd000002);
+  status |= test__floatsisf(0xf7ffffec, 0xcd000001);
+  status |= test__floatsisf(0xf7fffff0, 0xcd000001);
+  status |= test__floatsisf(0xf7fffff4, 0xcd000001);
+  status |= test__floatsisf(0xf7fffff8, 0xcd000000);
+  status |= test__floatsisf(0xf7fffffc, 0xcd000000);
+  status |= test__floatsisf(0xf8000000, 0xcd000000);
+
+  return status;
+}
diff --git a/compiler-rt/test/builtins/Unit/floatunsdidfnew_test.c 
b/compiler-rt/test/builtins/Unit/floatunsdidfnew_test.c
new file mode 100644
index 0000000000000..7199e02580738
--- /dev/null
+++ b/compiler-rt/test/builtins/Unit/floatunsdidfnew_test.c
@@ -0,0 +1,57 @@
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+// RUN: %clang_builtins %s %librt -o %t && %run %t
+// REQUIRES: librt_has_floatunsdidf
+
+#include "int_lib.h"
+#include <inttypes.h>
+#include <stdio.h>
+
+#include "fp_test.h"
+
+// Returns: a converted from uint64_t to double
+COMPILER_RT_ABI double __floatunsdidf(uint64_t a);
+
+int test__floatunsdidf(int line, uint64_t a, uint64_t expected_rep) {
+  double x = __floatunsdidf(a);
+  int ret = compareResultD(x, expected_rep);
+
+  if (ret) {
+    printf("error at line %d: __floatunsdidf(%016" PRIx64 ") = %016" PRIx64
+           ", expected %016" PRIx64 "\n",
+           line, a, toRep64(x), expected_rep);
+  }
+  return ret;
+}
+
+#define test__floatunsdidf(a,x) test__floatunsdidf(__LINE__,a,x)
+
+int main(void) {
+  int status = 0;
+
+  status |= test__floatunsdidf(0x0000000000000000, 0x0000000000000000);
+  status |= test__floatunsdidf(0x0000000000000001, 0x3ff0000000000000);
+  status |= test__floatunsdidf(0x0000000080000000, 0x41e0000000000000);
+  status |= test__floatunsdidf(0x0000000080000001, 0x41e0000000200000);
+  status |= test__floatunsdidf(0x0000000080000003, 0x41e0000000600000);
+  status |= test__floatunsdidf(0x0000000080000007, 0x41e0000000e00000);
+  status |= test__floatunsdidf(0x00000000fffffff8, 0x41efffffff000000);
+  status |= test__floatunsdidf(0x00000000fffffffc, 0x41efffffff800000);
+  status |= test__floatunsdidf(0x00000000fffffffe, 0x41efffffffc00000);
+  status |= test__floatunsdidf(0x00000000ffffffff, 0x41efffffffe00000);
+  status |= test__floatunsdidf(0x0000082345670000, 0x42a0468ace000000);
+  status |= test__floatunsdidf(0x0013d0942cab7317, 0x4333d0942cab7317);
+  status |= test__floatunsdidf(0x0100000000000000, 0x4370000000000000);
+  status |= test__floatunsdidf(0x0100000000000004, 0x4370000000000000);
+  status |= test__floatunsdidf(0x0100000000000008, 0x4370000000000000);
+  status |= test__floatunsdidf(0x010000000000000c, 0x4370000000000001);
+  status |= test__floatunsdidf(0x0100000000000010, 0x4370000000000001);
+  status |= test__floatunsdidf(0x0100000000000014, 0x4370000000000001);
+  status |= test__floatunsdidf(0x0100000000000018, 0x4370000000000002);
+  status |= test__floatunsdidf(0x010000000000001c, 0x4370000000000002);
+  status |= test__floatunsdidf(0xffffffffffffffff, 0x43f0000000000000);
+
+  return status;
+}
diff --git a/compiler-rt/test/builtins/Unit/floatunsdisfnew_test.c 
b/compiler-rt/test/builtins/Unit/floatunsdisfnew_test.c
new file mode 100644
index 0000000000000..f2d77da1639cf
--- /dev/null
+++ b/compiler-rt/test/builtins/Unit/floatunsdisfnew_test.c
@@ -0,0 +1,51 @@
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+// RUN: %clang_builtins %s %librt -o %t && %run %t
+// REQUIRES: librt_has_floatunsdisf
+
+#include "int_lib.h"
+#include <inttypes.h>
+#include <stdio.h>
+
+#include "fp_test.h"
+
+// Returns: a converted from uint64_t to float
+COMPILER_RT_ABI float __floatunsdisf(uint64_t a);
+
+int test__floatunsdisf(int line, uint64_t a, uint32_t expected_rep) {
+  float x = __floatunsdisf(a);
+  int ret = compareResultF(x, expected_rep);
+
+  if (ret) {
+    printf("error at line %d: __floatunsdisf(%016" PRIx64 ") = %08" PRIx32
+           ", expected %08" PRIx32 "\n",
+           line, a, toRep32(x), expected_rep);
+  }
+  return ret;
+}
+
+#define test__floatunsdisf(a,x) test__floatunsdisf(__LINE__,a,x)
+
+int main(void) {
+  int status = 0;
+
+  status |= test__floatunsdisf(0x0000000000000000, 0x00000000);
+  status |= test__floatunsdisf(0x0000000000000001, 0x3f800000);
+  status |= test__floatunsdisf(0x0000000008000000, 0x4d000000);
+  status |= test__floatunsdisf(0x0000000008000004, 0x4d000000);
+  status |= test__floatunsdisf(0x0000000008000008, 0x4d000000);
+  status |= test__floatunsdisf(0x000000000800000c, 0x4d000001);
+  status |= test__floatunsdisf(0x0000000008000010, 0x4d000001);
+  status |= test__floatunsdisf(0x0000000008000014, 0x4d000001);
+  status |= test__floatunsdisf(0x0000000008000018, 0x4d000002);
+  status |= test__floatunsdisf(0x000000000800001c, 0x4d000002);
+  status |= test__floatunsdisf(0x0000082345000000, 0x55023450);
+  status |= test__floatunsdisf(0x4000004000000001, 0x5e800001);
+  status |= test__floatunsdisf(0x8000000000000000, 0x5f000000);
+  status |= test__floatunsdisf(0x8000008000000000, 0x5f000000);
+  status |= test__floatunsdisf(0xffffffffffffffff, 0x5f800000);
+
+  return status;
+}
diff --git a/compiler-rt/test/builtins/Unit/floatunssidfnew_test.c 
b/compiler-rt/test/builtins/Unit/floatunssidfnew_test.c
new file mode 100644
index 0000000000000..07e1ad3928846
--- /dev/null
+++ b/compiler-rt/test/builtins/Unit/floatunssidfnew_test.c
@@ -0,0 +1,41 @@
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+// RUN: %clang_builtins %s %librt -o %t && %run %t
+// REQUIRES: librt_has_floatunssidf
+
+#include "int_lib.h"
+#include <inttypes.h>
+#include <stdio.h>
+
+#include "fp_test.h"
+
+// Returns: a converted from uint32_t to double
+COMPILER_RT_ABI double __floatunssidf(uint32_t a);
+
+int test__floatunssidf(int line, uint32_t a, uint64_t expected_rep) {
+  double x = __floatunssidf(a);
+  int ret = compareResultD(x, expected_rep);
+
+  if (ret) {
+    printf("error at line %d: __floatunssidf(%08" PRIx32 ") = %016" PRIx64
+           ", expected %016" PRIx64 "\n",
+           line, a, toRep64(x), expected_rep);
+  }
+  return ret;
+}
+
+#define test__floatunssidf(a,x) test__floatunssidf(__LINE__,a,x)
+
+int main(void) {
+  int status = 0;
+
+  status |= test__floatunssidf(0x00000000, 0x0000000000000000);
+  status |= test__floatunssidf(0x00000001, 0x3ff0000000000000);
+  status |= test__floatunssidf(0x80000400, 0x41e0000080000000);
+  status |= test__floatunssidf(0x80000800, 0x41e0000100000000);
+  status |= test__floatunssidf(0xffffffff, 0x41efffffffe00000);
+
+  return status;
+}
diff --git a/compiler-rt/test/builtins/Unit/floatunssisfnew_test.c 
b/compiler-rt/test/builtins/Unit/floatunssisfnew_test.c
new file mode 100644
index 0000000000000..5ec0cc7426ab0
--- /dev/null
+++ b/compiler-rt/test/builtins/Unit/floatunssisfnew_test.c
@@ -0,0 +1,55 @@
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+// RUN: %clang_builtins %s %librt -o %t && %run %t
+// REQUIRES: librt_has_floatunssisf
+
+#include "int_lib.h"
+#include <inttypes.h>
+#include <stdio.h>
+
+#include "fp_test.h"
+
+// Returns: a converted from uint32_t to float
+COMPILER_RT_ABI float __floatunssisf(uint32_t a);
+
+int test__floatunssisf(int line, uint32_t a, uint32_t expected_rep) {
+  float x = __floatunssisf(a);
+  int ret = compareResultF(x, expected_rep);
+
+  if (ret) {
+    printf("error at line %d: __floatunssisf(%08" PRIx32 ") = %08" PRIx32
+           ", expected %08" PRIx32 "\n",
+           line, a, toRep32(x), expected_rep);
+  }
+  return ret;
+}
+
+#define test__floatunssisf(a,x) test__floatunssisf(__LINE__,a,x)
+
+int main(void) {
+  int status = 0;
+
+  status |= test__floatunssisf(0x00000000, 0x00000000);
+  status |= test__floatunssisf(0x00000001, 0x3f800000);
+  status |= test__floatunssisf(0x08000000, 0x4d000000);
+  status |= test__floatunssisf(0x08000004, 0x4d000000);
+  status |= test__floatunssisf(0x08000008, 0x4d000000);
+  status |= test__floatunssisf(0x0800000c, 0x4d000001);
+  status |= test__floatunssisf(0x08000010, 0x4d000001);
+  status |= test__floatunssisf(0x08000014, 0x4d000001);
+  status |= test__floatunssisf(0x08000018, 0x4d000002);
+  status |= test__floatunssisf(0x0800001c, 0x4d000002);
+  status |= test__floatunssisf(0xfffffe00, 0x4f7ffffe);
+  status |= test__floatunssisf(0xfffffe7f, 0x4f7ffffe);
+  status |= test__floatunssisf(0xfffffe80, 0x4f7ffffe);
+  status |= test__floatunssisf(0xfffffe81, 0x4f7fffff);
+  status |= test__floatunssisf(0xffffff00, 0x4f7fffff);
+  status |= test__floatunssisf(0xffffff7f, 0x4f7fffff);
+  status |= test__floatunssisf(0xffffff80, 0x4f800000);
+  status |= test__floatunssisf(0xffffff81, 0x4f800000);
+  status |= test__floatunssisf(0xffffffff, 0x4f800000);
+
+  return status;
+}

_______________________________________________
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [compiler-rt] [compiler-rt][ARM] Optimized integer -> FP conversions (PR #179928)

Reply via email to