https://github.com/statham-arm created https://github.com/llvm/llvm-project/pull/179928
This commit adds a total of 8 new functions, all converting an integer to a floating-point number, varying in 3 independent choices: * input integer size (32-bit or 64-bit) * input integer type (signed or unsigned) * output float format (32-bit or 64-bit) The two conversions of 64-bit integer to 32-bit float live in the same source file, to save code size, since that conversion is one of the more complicated ones and the two functions can share most of their code, with only a few instructions differing at the start to handle negative numbers (or not). >From b5bb324fffbbb1725c89d87cd1845f29f8dba24d Mon Sep 17 00:00:00 2001 From: Simon Tatham <[email protected]> Date: Thu, 29 Jan 2026 16:14:34 +0000 Subject: [PATCH] [compiler-rt][ARM] Optimized integer -> FP conversions This commit adds a total of 8 new functions, all converting an integer to a floating-point number, varying in 3 independent choices: * input integer size (32-bit or 64-bit) * input integer type (signed or unsigned) * output float format (32-bit or 64-bit) The two conversions of 64-bit integer to 32-bit float live in the same source file, to save code size, since that conversion is one of the more complicated ones and the two functions can share most of their code, with only a few instructions differing at the start to handle negative numbers (or not). --- compiler-rt/lib/builtins/CMakeLists.txt | 8 + compiler-rt/lib/builtins/arm/floatdidf.S | 210 ++++++++++++++++++ compiler-rt/lib/builtins/arm/floatdisf.S | 200 +++++++++++++++++ compiler-rt/lib/builtins/arm/floatsidf.S | 72 ++++++ compiler-rt/lib/builtins/arm/floatsisf.S | 113 ++++++++++ compiler-rt/lib/builtins/arm/floatunsdidf.S | 180 +++++++++++++++ compiler-rt/lib/builtins/arm/floatunssidf.S | 64 ++++++ compiler-rt/lib/builtins/arm/floatunssisf.S | 103 +++++++++ .../test/builtins/Unit/floatdidfnew_test.c | 68 ++++++ .../test/builtins/Unit/floatdisfnew_test.c | 59 +++++ .../test/builtins/Unit/floatsidfnew_test.c | 43 ++++ .../test/builtins/Unit/floatsisfnew_test.c | 57 +++++ .../test/builtins/Unit/floatunsdidfnew_test.c | 57 +++++ .../test/builtins/Unit/floatunsdisfnew_test.c | 51 +++++ .../test/builtins/Unit/floatunssidfnew_test.c | 41 ++++ .../test/builtins/Unit/floatunssisfnew_test.c | 55 +++++ 16 files changed, 1381 insertions(+) create mode 100644 compiler-rt/lib/builtins/arm/floatdidf.S create mode 100644 compiler-rt/lib/builtins/arm/floatdisf.S create mode 100644 compiler-rt/lib/builtins/arm/floatsidf.S create mode 100644 compiler-rt/lib/builtins/arm/floatsisf.S create mode 100644 compiler-rt/lib/builtins/arm/floatunsdidf.S create mode 100644 compiler-rt/lib/builtins/arm/floatunssidf.S create mode 100644 compiler-rt/lib/builtins/arm/floatunssisf.S create mode 100644 compiler-rt/test/builtins/Unit/floatdidfnew_test.c create mode 100644 compiler-rt/test/builtins/Unit/floatdisfnew_test.c create mode 100644 compiler-rt/test/builtins/Unit/floatsidfnew_test.c create mode 100644 compiler-rt/test/builtins/Unit/floatsisfnew_test.c create mode 100644 compiler-rt/test/builtins/Unit/floatunsdidfnew_test.c create mode 100644 compiler-rt/test/builtins/Unit/floatunsdisfnew_test.c create mode 100644 compiler-rt/test/builtins/Unit/floatunssidfnew_test.c create mode 100644 compiler-rt/test/builtins/Unit/floatunssisfnew_test.c diff --git a/compiler-rt/lib/builtins/CMakeLists.txt b/compiler-rt/lib/builtins/CMakeLists.txt index 5cf754135908b..ed6a9d00db069 100644 --- a/compiler-rt/lib/builtins/CMakeLists.txt +++ b/compiler-rt/lib/builtins/CMakeLists.txt @@ -465,6 +465,13 @@ if(COMPILER_RT_ARM_OPTIMIZED_FP AND BUILTIN_SUPPORTED_ARCH MATCHES "arm") arm/fixunsdfsi.S arm/fixunssfdi.S arm/fixunssfsi.S + arm/floatdidf.S + arm/floatdisf.S + arm/floatsidf.S + arm/floatsisf.S + arm/floatunsdidf.S + arm/floatunssidf.S + arm/floatunssisf.S arm/truncdfsf2.S ) set_source_files_properties(${assembly_files} @@ -478,6 +485,7 @@ if(COMPILER_RT_ARM_OPTIMIZED_FP AND BUILTIN_SUPPORTED_ARCH MATCHES "arm") arm/dnorm2.c arm/dunder.c ) + set_property(SOURCE arm/floatdisf.S DIRECTORY ${COMPILER_RT_SOURCE_DIR} PROPERTY crt_provides floatunsdisf) endif() endif() diff --git a/compiler-rt/lib/builtins/arm/floatdidf.S b/compiler-rt/lib/builtins/arm/floatdidf.S new file mode 100644 index 0000000000000..6c8875209d635 --- /dev/null +++ b/compiler-rt/lib/builtins/arm/floatdidf.S @@ -0,0 +1,210 @@ +//===-- floatdidf.S - 64-bit signed int to double-precision FP conversion -===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements the __floatdidf function (64-bit signed integer to +// double precision floating point conversion), with the IEEE-754 default +// rounding (to nearest, ties to even), for the Arm and Thumb2 ISAs. +// +//===----------------------------------------------------------------------===// + +#include "../assembly.h" +#include "endian.h" + + .syntax unified + .text + .p2align 2 + +#if __ARM_PCS_VFP +DEFINE_COMPILERRT_FUNCTION(__floatdidf) + push {r4, lr} + bl __aeabi_l2d + VMOV_TO_DOUBLE(d0, r0, r1) + pop {r4, pc} +#else +DEFINE_COMPILERRT_FUNCTION_ALIAS(__floatdidf, __aeabi_l2d) +#endif + +DEFINE_COMPILERRT_FUNCTION(__aeabi_l2d) + + // Start by finding out the sign of the input. This instruction sets up r2 to + // be 0xFFFFFFFF for a _positive_ number, and 0 for a negative number. + mvns r2, xh, asr #31 + + // Now take the absolute value, by negating the input if it was negative + // (which we can tell from the flags set by that MVNS). + // + // Because r2 = 0 in the case where we need to negate, we can do the negation + // using SUB/SBC instead of the more obvious RSB/RSC, which saves having to + // use an extra instruction in Thumb to work around RSC not being available. + bne 0f // if positive, branch round the negation + subs xl, r2, xl + sbc xh, r2, xh +0: + + // Now we just need to convert a positive integer to double, and combine it + // with the sign in r2. + // + // There are essentially three cases we need to separate. The leading bit of + // the integer is either in xh or xl; if it's in xh, it makes a difference + // whether it's above bit 20, because that's the case where we must shift + // right and potentially round. + // + // Start by assuming the high word is nonzero; if we're wrong, we'll find out + // in a few instructions' time and be able to try again. So we find the + // position of the leading bit in xh, and turn it into a left-shift count + // that will move the leading bit up to where it belongs in the output + // double. + clz r3, xh + subs r3, r3, #11 + + // If that left-shift count is negative, we're going to have to shift the + // mantissa right instead of left, and maybe round it. Branch out of line for + // the code that handles that case. + blo LOCAL_LABEL(shiftdown) + + // Shift xh left to bring the top word of the mantissa to the right place. By + // making this shift set the flags, we detect if xh was zero. + // + // We branch out of line if it _wasn't_ zero, on the theory that small input + // integers are likely to occur more often than large ones, so the small case + // should be the faster path. This is a bit of a compromise between large and + // small integer performance: if we wanted to prioritise small inputs above + // all else, we could have tested if xh=0 to begin with - but that would cost + // an extra instruction on the large-integer path, because it repeats work + // that this instruction can do in passing. + lsls xh, xh, r3 + bne LOCAL_LABEL(highword) + + // Now we've found out that xh=0, we need to repeat the CLZ instruction on + // xl. The simplest thing is to shift xl up by a variable distance to put its + // leading bit at the top; then we can do immediate shifts to move it up + // further to the top of the double-precision mantissa. (Otherwise you'd have + // to make a second shift count by subtracting from 32, using more registers + // and requiring more register-controlled shifts, especially awkward in + // Thumb.) + // + // There may not _be_ a leading bit in xl at all (just as there turned out + // not to have been one in xh, if we're on this path). In that case the input + // integer was 0, and so we should return double-precision 0, which + // conveniently has the same representation (xh=xl=0 already). + clz r3, xl // decide how far to shift up + lsls xh, xl, r3 // do the shift, also checking if xl = 0 + bxeq lr // if xl = 0, return zero immediately + + // Now xl contains the output mantissa, with the leading bit at the top. We + // must shift that up another 21 bits, and recombine it with an exponent + // derived from r3 (telling us how far we've already shifted up), and the + // sign derived from r2. + // + // If r3=0 then the input value was in the range [2^31,2^32), so its exponent + // in double precision should be 0x41e. We want to reduce that by 1 so that + // the leading bit of the mantissa will increment it when we add it in. So + // the exponent should be 0x41d minus r3. + // + // But also, r2 is 0 for a negative number and -1 for a positive number. So + // we _set_ the sign bit on the output, by calculating the sign+exponent + // fields together as 0xc1d minus r3; then we shift r2 left and subtract it, + // which decrements the sign bit from 1 back to 0 if the number was positive + // (hence r2 had all bits set). + rsb r3, r3, #0x1d // 0x1d minus shift count + add r3, r3, #0xc00 // 0xc1d minus shift count + sub r3, r3, r2, lsl #11 // combine with the sign + lsr r2, xh, #11 // make top word of mantissa + lsl xl, xh, #21 // make bottom word of mantissa + add xh, r2, r3, lsl #20 // combine top of mantissa with sign+exp + bx lr + +LOCAL_LABEL(highword): + // This is the branch for numbers big enough that xh != 0, but not big enough + // to need to shift downwards and round. + // + // r3 is the distance that we've already shifted xh left by. We'll need to + // shift xl left by the same amount, and we'll also need to shift xl right by + // 32 minus that, to put some of its bits at the bottom of xh. Start by + // working out the rightward shift distance, and make the full top word of + // the mantissa. + rsb r12, r3, #32 +#if !__thumb__ + orr xh, xh, xl, lsr r12 +#else + // In Thumb we have to do the register-controlled shift and the OR in + // separate instructions. + lsr r12, xl, r12 + orr xh, xh, r12 +#endif + // Shift xl left as well, so that xh:xl are now the full output mantissa, + // with its leading bit in bit 20 of xh. + lsls xl, xl, r3 + + // Calculate the sign and exponent, and recombine them with the mantissa. + // This is exactly the same method as above, except that the exponent is + // different, because this time r3 stores the offset between the original + // leading bit position and bit 20 of the mantissa, so that it's zero if the + // input is in the range [2^52,2^53), which would make the output exponent + // 0x433, or 0x432 after compensating for the leading mantissa bit. + rsb r3, r3, #0x32 // 0x32 minus shift count + add r3, r3, #0xc00 // 0xc32 minus shift count + sub r3, r3, r2, lsl #11 // combine with the sign + add xh, xh, r3, lsl #20 // and with the top word of the mantissa + bx lr + +LOCAL_LABEL(shiftdown): + // This is the branch for numbers so big that the mantissa has to be shifted + // _right_, so that some of the mantissa is shifted off the bottom and the + // number has to be rounded. + // + // r3 contains the shift count, but it's currently negative (it was + // calculated as a left shift). So it's in a good state to use for + // calculating the output exponent, and therefore we do that first, while + // it's convenient. + rsb r12, r3, #0x32 // 0x32 minus shift count + add r12, r12, #0xc00 // 0xc32 minus shift count + sub r2, r12, r2, lsl #11 // combine with the sign + + // Shift the mantissa down to the right position, capturing the bits shifted + // off the bottom at the top of r3. We'll need to temporarily push a couple + // of extra registers for this part, because we need to calculate how far to + // shift xh and xl right, but also how far to shift them left to get the bits + // shifted out of each one. + push {r4,lr} + rsb r4, r3, #0 // r4 = right-shift count + rsb lr, r4, #32 // lr = left-shift count + lsl r12, xh, lr // r12 = bits shifted out of xh + lsr xh, xh, r4 // shift xh right to make its final value + lsl r3, xl, lr // r3 = bits shifted out of xl +#if !__thumb__ + orrs xl, r12, xl, lsr r4 // shift xl right and combine with r12 +#else + // In Thumb we have to do the register-controlled shift and the OR in + // separate instructions. + lsrs xl, xl, r4 + orr xl, xl, r12 +#endif + pop {r4,lr} + + // Now xh:xl contains the unrounded output mantissa; r2 contains its sign and + // exponent; and r3 contains the bits shifted off the bottom. Also, the + // single flag-setting shift in the sequence above was the one that shifted + // xl right, so the carry flag contains the bit just off the bottom, i.e. the + // bit that tells us whether we need to round up. + // + // Recombine the mantissa with the sign and exponent, and then if C is clear, + // we don't need to round up, and can return. + add xh, xh, r2, lsl #20 // put back the sign+exponent + bxcc lr // return if we don't have to round + + // We're rounding up, and we may also need to round to even. + adds xl, xl, #1 // increment the mantissa to round up + adc xh, xh, #0 // and propagate a carry if any + lsls r3, r3, #1 // set Z if we had an exact halfway case + biceq xl, xl, #1 // and round back to even if so + bx lr + +END_COMPILERRT_FUNCTION(__aeabi_l2d) + +NO_EXEC_STACK_DIRECTIVE diff --git a/compiler-rt/lib/builtins/arm/floatdisf.S b/compiler-rt/lib/builtins/arm/floatdisf.S new file mode 100644 index 0000000000000..7048c41a01f3a --- /dev/null +++ b/compiler-rt/lib/builtins/arm/floatdisf.S @@ -0,0 +1,200 @@ +//===-- floatdisf.S - 64-bit integer to single-precision FP conversion ----===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements the __floatdisf and __floatunsdisf functions (64-bit +// signed and unsigned integer to single precision floating point conversion), +// with the IEEE-754 default rounding (to nearest, ties to even), for the Arm +// and Thumb2 ISAs. +// +//===----------------------------------------------------------------------===// + +#include "../assembly.h" +#include "endian.h" + + .syntax unified + .text + .p2align 2 + +#if __ARM_PCS_VFP +DEFINE_COMPILERRT_FUNCTION(__floatdisf) + push {r4, lr} + bl __aeabi_l2f + vmov s0, r0 + pop {r4, pc} +#else +DEFINE_COMPILERRT_FUNCTION_ALIAS(__floatdisf, __aeabi_l2f) +#endif + +DEFINE_COMPILERRT_FUNCTION(__aeabi_l2f) + + // Isolate the input integer's sign bit in r2, and if the input is negative, + // negate it. + ands r2, xh, #0x80000000 + bpl 0f // if positive, skip the negation + rsbs xl, xl, #0 +#if !__thumb__ + rsc xh, xh, #0 +#else + // Thumb has no RSC, so simulate it by bitwise inversion and then ADC + mvn xh, xh + adc xh, xh, #0 +#endif +0: + + // Combine the sign in r2 with the FP exponent of 1. So r2 now holds the + // single-precision encoding of +1 or -1 as appropriate. + orr r2, r2, #0x3f800000 + + // Now we have a positive 64-bit integer in xh,xl, and a starting sign + + // exponent in r2. + // + // We also come here from the unsigned-integer conversion function below, so + // we must handle xh,xl having any possible values at all, even 2^63 or + // greater. +LOCAL_LABEL(normalise): + + // Add 30 to the exponent in r2, so that it holds +2^30 or -2^30. The idea is + // that after we normalise the input integer into a FP mantissa with the + // topmost 1 in bit 23, adding that will increment by one more, so that this + // exponent will be correct if the input has its high bit in bit 31. We'll + // decrease the exponent if CLZ returns a positive value, and increment it by + // 32 if the high word is used. + // + // You might ask why we didn't set up r2 to have this value in the first + // place, by ORRing the sign bit with 0x4e800000 instead of 0x3f800000. The + // answer is because 0x4e800000 can't be represented in the immediate field + // of an AArch32 data-processing instruction, so we can't avoid using two + // instructions. + add r2, r2, #30 << 23 + + // Start setting up r3 to be the exponent adjustment, and set xh to be the + // highest _nonzero_ word of the input. If xh = 0, set r3 = 0 and copy xl + // (the only nonzero input word) into xh; if xh != 0, set r3 = 32. + movs r3, xh // sets r3=0 if xh=0, testing at the same time + movne r3, #32 // if that didn't happen, set r3=32 + movseq xh, xl // and otherwise, copy xl into xh + + // Using a MOVS for the final copy has the side effect that we've also just + // tested whether xh = xl = 0. If so, then the entire input value was zero, + // so we should return 0. Conveniently, that's the value in both xl and xh + // right now, so no matter which of those is r0 (which varies with + // endianness) we can just return. + bxeq lr + + // Now we know xh contains the highest set bit of the input. Find that bit, + // shift it up to the top of the word, and adjust the shift count + // appropriately. + // + // After this, r3 contains the full exponent adjustment we'll need to add to + // the starting exponent in r2: it takes values from -31 (if the input was 1) + // to +32 (if the input was 2^63 or bigger). + clz r12, xh + mov xh, xh, lsl r12 + sub r3, r3, r12 + + // If the input integer is < 2^32, then we've now set up xh to be the full + // output mantissa (with its leading bit at the top of the word). If not, + // then we still need to add some bits from xl. + // + // We don't need to spend an instruction on deciding which: it's enough to + // just shift xl right by whatever is in r3. In the case where we don't want + // it (because the bits in xl are already in the output mantissa), r3 <= 0. + // If r3 = 0 (the input was an exactly 32-bit integer) then the bits in xl + // will exactly overlay the ones already in xh and make no difference; if r3 + // < 0 then the AArch32 shift instruction semantics will treat it as a shift + // of more than 32 bits, shifting xl right off the bottom of the word, and + // again not modify xh. +#if !__thumb__ + orr xh, xh, xl, lsr r3 // if shift negative then xh unaltered +#else + // Thumb can't fold a register-controlled shift into an ORR, so we must use + // two separate instructions. + lsr r12, xl, r3 + orr xh, xh, r12 +#endif + + // Combine the exponent adjustment in r3 with the starting exponent and sign + // in r2. These parts of the output are now ready to combine with the + // mantissa, once we've shifted it down and rounded it. + add r2, r2, r3, lsl #23 + + // Now we must round. The mantissa in r12 contains the top 32 bits of the + // full result, including the bit we're going to shift just off the bottom + // (which controls the basic 'round up or down?' question). So we can start + // by checking those, which will handle most cases. + + // This shift moves the round bit off the top of xh into the carry flag, so + // that C is set if we're rounding up. It also sets Z if all the bits below + // that are zero, which _might_ mean we need to round to even, but only if + // the further bits in xl are also zero. But if Z is _not_ set then we can + // return without checking xl. + lsls r12, xh, #25 + +#ifndef __BIG_ENDIAN__ + // We're about to overwrite r0 with the preliminary output. This will be our + // last use of xh, but we still need xl later. So in little-endian mode, + // where xl _is_ r0, we must spend an extra instruction on saving it. + mov r12, xl +#endif + + // Recombine the mantissa (shifted down to the right position) with the sign + // and exponent in r2. Using ADC also rounds up if C is set. + adc r0, r2, xh, lsr #8 + + // If C was clear, we didn't round up, so we don't need to undo that by + // rounding to even. And if Z was clear, we're not rounding to even anyway. + // So in either case, we're done. + bxcc lr + bxne lr + + // The slow path: nothing in the top 32 bits of the mantissa ruled out having + // to round to even. Now we must check the rest of the mantissa bits in xl. + // + // This RSB instruction converts the previous exponent adjustment value (-31 + // for smallest integer, +32 for largest) into a value from 0 (_largest_ + // integer) to 63 (smallest). So if the integer occupied n bits of xh, then + // 32-n bits of xl ended up in the initial mantissa word, so shifting xl left + // by 32-n will catch precisely the bits of xl that didn't. And if the + // integer was entirely in xl, then this shift count will be >=32, so the + // left shift will throw away all of xl. + rsb r3, r3, #32 + + // Shift xl to include just the shifted-off bits, setting Z if they're all + // zero. Then we know whether to round to even by clearing bit 0 of the + // output. +#ifdef __BIG_ENDIAN__ + lsls r12, xl, r3 // the low word is still in xl itself +#else + lsls r12, r12, r3 // we moved it into r12 earlier +#endif + biceq r0, r0, #1 + + // And whether we did that or not, we're finished. + bx lr + +END_COMPILERRT_FUNCTION(__aeabi_l2f) + +#if __ARM_PCS_VFP +DEFINE_COMPILERRT_FUNCTION(__floatunsdisf) + push {r4, lr} + bl __aeabi_ul2f + vmov s0, r0 + pop {r4, pc} +#else +DEFINE_COMPILERRT_FUNCTION_ALIAS(__floatunsdisf, __aeabi_ul2f) +#endif + +DEFINE_COMPILERRT_FUNCTION(__aeabi_ul2f) + // Jump to l2f_normalise above, without negating the input, and having set up + // r2 unconditionally to indicate that a positive output is wanted. + mov r2, #0x3f800000 + b LOCAL_LABEL(normalise) + +END_COMPILERRT_FUNCTION(__aeabi_ul2f) + +NO_EXEC_STACK_DIRECTIVE diff --git a/compiler-rt/lib/builtins/arm/floatsidf.S b/compiler-rt/lib/builtins/arm/floatsidf.S new file mode 100644 index 0000000000000..22d9c572fff44 --- /dev/null +++ b/compiler-rt/lib/builtins/arm/floatsidf.S @@ -0,0 +1,72 @@ +//===-- floatsidf.S - 32-bit signed int to double-precision FP conversion -===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements the __floatsidf function (32-bit signed integer to +// double precision floating point conversion), with the IEEE-754 default +// rounding (to nearest, ties to even), for the Arm and Thumb2 ISAs. +// +//===----------------------------------------------------------------------===// + +#include "../assembly.h" +#include "endian.h" + + .syntax unified + .text + .p2align 2 + +#if __ARM_PCS_VFP +DEFINE_COMPILERRT_FUNCTION(__floatsidf) + push {r4, lr} + bl __aeabi_i2d + VMOV_TO_DOUBLE(d0, r0, r1) + pop {r4, pc} +#else +DEFINE_COMPILERRT_FUNCTION_ALIAS(__floatsidf, __aeabi_i2d) +#endif + +DEFINE_COMPILERRT_FUNCTION(__aeabi_i2d) + + // Start by separating the input into its sign and its absolute value. + ands r2, r0, #0x80000000 // get sign + rsbmi r0, r0, #0 // negate input if it was negative + + // Shift the leading bit of the input to the top of xh. In the + // process, we detect a zero input, and branch out of line for it. + clz r3, r0 + lsls xh, r0, r3 + beq LOCAL_LABEL(zero) + + // Use the shift count in r3 to calculate the output exponent, and combine it + // with the sign bit in r2. + // + // If r3=0 then the input value was in the range [2^31,2^32), so its exponent + // in double precision should be 0x41e. We want to reduce that by 1 so that + // the leading bit of the mantissa will increment it when we add it in. So + // the exponent should be 0x41d minus r3. + // + // (For a signed integer, the only possible value in that range would be + // 2^31, obtained as the absolute value of an original input of -2^31.) + rsb r3, r3, #0x1d // 0x1d minus shift count + add r3, r3, #0x400 // 0x41d minus shift count + add r2, r2, r3, lsl #20 // r2 is now sign+exponent + + // Shift the mantissa left and right to get the parts that go in xl and xh, + // and combine with the sign and exponent we just computed. + lsl xl, xh, #21 // low bits of mantissa + add xh, r2, xh, lsr #11 // high bits of mantissa + sign + exponent + bx lr + +LOCAL_LABEL(zero): + // We come here if the input was zero. We've just set xh to 0, so we + // only need to set xl to 0 too and return. + mov xl, #0 + bx lr + +END_COMPILERRT_FUNCTION(__aeabi_i2d) + +NO_EXEC_STACK_DIRECTIVE diff --git a/compiler-rt/lib/builtins/arm/floatsisf.S b/compiler-rt/lib/builtins/arm/floatsisf.S new file mode 100644 index 0000000000000..e7d474f81572b --- /dev/null +++ b/compiler-rt/lib/builtins/arm/floatsisf.S @@ -0,0 +1,113 @@ +//===-- floatsisf.S - 32-bit signed int to single-precision FP conversion -===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements the __floatsisf function (32-bit signed integer to +// single precision floating point conversion), with the IEEE-754 default +// rounding (to nearest, ties to even), for the Arm and Thumb2 ISAs. +// +//===----------------------------------------------------------------------===// + +#include "../assembly.h" + + .syntax unified + .text + .p2align 2 + +#if __ARM_PCS_VFP +DEFINE_COMPILERRT_FUNCTION(__floatsisf) + push {r4, lr} + bl __aeabi_i2f + vmov s0, r0 + pop {r4, pc} +#else +DEFINE_COMPILERRT_FUNCTION_ALIAS(__floatsisf, __aeabi_i2f) +#endif + +DEFINE_COMPILERRT_FUNCTION(__aeabi_i2f) + + // Isolate the input integer's sign bit in r1, and if the input is negative, + // negate it. + ands r1, r0, #0x80000000 + rsbmi r0, r0, #0 + + // Find the highest set bit of the input, and shift it up to the top bit. r3 + // contains the amount we shifted by, and r2 the shifted value. + clz r3, r0 + lsls r2, r0, r3 + + // Convert the shift distance into the exponent of the output float. The + // exponent for an integer with bit 31 set should be 0x7f (the IEEE exponent + // bias) plus 31, which is 0x9e. Here we reduce that by 1, because when we + // add the mantissa to it, the leading mantissa bit will increment it. + rsb r3, r3, #0x9d + + // A side effect of the LSLS above was to set the Z flag if the input integer + // was actually zero. In that situation, we can just return immediately, + // because r0 still _contains_ the input integer, which has the same + // representation as the floating-point +0 that we should return. +#if !__thumb__ + bxeq lr +#else + // In Thumb, we do the conditional return by branching to a return + // instruction later in the function. This costs more time in the case where + // the return is taken, but saves an IT in the case where it's not, and we + // assume that nonzero integers are converted to FP more often than zero is. + // (This also improves the _worst-case_ running time, because the nonzero + // code path is the limiting factor.) + beq LOCAL_LABEL(return) +#endif + + // Shift the exponent up to its final bit position, and combine it with the + // sign. + orrs r1, r1, r3, lsl #23 + + // Recombine the mantissa with the sign+exponent, and round. This is done + // differently between Arm and Thumb. +#if !__thumb__ + // Arm rounding sequence: shift the round bit off the top of r2 into C, and + // simultaneously set Z if the lower-down bits are all zero. + lsls r3, r2, #25 + // Recombine mantissa with sign+exponent, using ADC so that this also adds 1 + // if we're rounding up. + adc r0, r1, r2, lsr #8 + // If C was clear, we can't possibly need to round to even, so return. + bxcc lr + // Now we've definitely rounded up, and if Z is set, round to even. + biceq r0, r0, #1 + bx lr +#else + // Thumb rounding sequence: we do things in a slightly different order, by + // recombining first with plain ADD, and _then_ testing the round bit. On + // simple M-profile CPUs like Cortex-M3, this avoids the IT instruction + // (inserted before BXCC lr) costing a cycle, because it immediately follows + // a 16-bit LSLS instruction, so the CPU had already fetched it. + // + // So we save a cycle in the case where we don't round up, at the cost of a + // cycle in the case where we do (requiring a separate ADD instruction after + // the BXCC lr isn't taken). We expect that this is a good trade, on the + // theory that _most_ integers converted into floating point are not large + // enough to need rounding at all, so all the exact cases _and_ half the + // inexact ones will benefit from the saving. + add r0, r1, r2, lsr #8 // r0 is now sign+exp+mant, unrounded +#if __thumb__ + lsls.N r3, r2, #25 // .N to make sure it's assembled as 16-bit +#else + lsls r3, r2, #25 // GNU assembler treats .N as an error in Arm +#endif + bxcc lr + // Now if we didn't take the return, we must definitely round up, and + // conditionally round to even. + add r0, r0, #1 + biceq r0, r0, #1 +LOCAL_LABEL(return): // label we branch to from the 0 case above + bx lr +#endif + +END_COMPILERRT_FUNCTION(__aeabi_i2f) + +NO_EXEC_STACK_DIRECTIVE diff --git a/compiler-rt/lib/builtins/arm/floatunsdidf.S b/compiler-rt/lib/builtins/arm/floatunsdidf.S new file mode 100644 index 0000000000000..8c368df37169c --- /dev/null +++ b/compiler-rt/lib/builtins/arm/floatunsdidf.S @@ -0,0 +1,180 @@ +//===-- floatunsdidf.S - 64-bit unsigned int to double-precision FP conversion// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements the __floatunsdidf function (64-bit unsigned integer to +// double precision floating point conversion), with the IEEE-754 default +// rounding (to nearest, ties to even), for the Arm and Thumb2 ISAs. +// +//===----------------------------------------------------------------------===// + +#include "../assembly.h" +#include "endian.h" + + .syntax unified + .text + .p2align 2 + +#if __ARM_PCS_VFP +DEFINE_COMPILERRT_FUNCTION(__floatunsdidf) + push {r4, lr} + bl __aeabi_ul2d + VMOV_TO_DOUBLE(d0, r0, r1) + pop {r4, pc} +#else +DEFINE_COMPILERRT_FUNCTION_ALIAS(__floatunsdidf, __aeabi_ul2d) +#endif + +DEFINE_COMPILERRT_FUNCTION(__aeabi_ul2d) + + // There are essentially three cases we need to separate. The leading bit of + // the integer is either in xh or xl; if it's in xh, it makes a difference + // whether it's above bit 20, because that's the case where we must shift + // right and potentially round. + // + // Start by assuming the high word is nonzero; if we're wrong, we'll find out + // in a few instructions' time and be able to try again. So we find the + // position of the leading bit in xh, and turn it into a left-shift count + // that will move the leading bit up to where it belongs in the output + // double. + clz r3, xh + subs r3, r3, #11 + + // If that left-shift count is negative, we're going to have to shift the + // mantissa right instead of left, and maybe round it. Branch out of line for + // the code that handles that case. + blo LOCAL_LABEL(shiftdown) + + // Shift xh left to bring the top word of the mantissa to the right place. By + // making this shift set the flags, we detect if xh was zero. + // + // We branch out of line if it _wasn't_ zero, on the theory that small input + // integers are likely to occur more often than large ones, so the small case + // should be the faster path. This is a bit of a compromise between large and + // small integer performance: if we wanted to prioritise small inputs above + // all else, we could have tested if xh=0 to begin with - but that would cost + // an extra instruction on the large-integer path, because it repeats work + // that this instruction can do in passing. + lsls xh, xh, r3 + bne LOCAL_LABEL(highword) + + // Now we've found out that xh=0, we need to repeat the CLZ instruction on + // xl. The simplest thing is to shift xl up by a variable distance to put its + // leading bit at the top; then we can do immediate shifts to move it up + // further to the top of the double-precision mantissa. (Otherwise you'd have + // to make a second shift count by subtracting from 32, using more registers + // and requiring more register-controlled shifts, especially awkward in + // Thumb.) + // + // There may not _be_ a leading bit in xl at all (just as there turned out + // not to have been one in xh, if we're on this path). In that case the input + // integer was 0, and so we should return double-precision 0, which + // conveniently has the same representation (xh=xl=0 already). + clz r3, xl // decide how far to shift up + lsls xh, xl, r3 // do the shift, also checking if xl = 0 + bxeq lr // if xl = 0, return zero immediately + + // Now xl contains the output mantissa, with the leading bit at the top. We + // must shift that up another 21 bits, and recombine it with an exponent + // derived from r3 (telling us how far we've already shifted up). + // + // If r3=0 then the input value was in the range [2^31,2^32), so its exponent + // in double precision should be 0x41e. We want to reduce that by 1 so that + // the leading bit of the mantissa will increment it when we add it in. So + // the exponent should be 0x41d minus r3. + rsb r3, r3, #0x1d // 0x1d minus shift count + add r3, r3, #0x400 // 0x41d minus shift count + lsr r2, xh, #11 // make top word of mantissa + lsl xl, xh, #21 // make bottom word of mantissa + add xh, r2, r3, lsl #20 // and combine it with exponent + bx lr + +LOCAL_LABEL(highword): + // This is the branch for numbers big enough that xh != 0, but not big enough + // to need to shift downwards and round. + // + // r3 is the distance that we've already shifted xh left by. We'll need to + // shift xl left by the same amount, and we'll also need to shift xl right by + // 32 minus that, to put some of its bits at the bottom of xh. + rsb r12, r3, #32 +#if !__thumb__ + orr xh, xh, xl, lsr r12 +#else + // In Thumb we have to do the register-controlled shift and the OR in + // separate instructions. + lsr r12, xl, r12 + orr xh, xh, r12 +#endif + // Shift xl left as well, so that xh:xl are now the full output mantissa, + // with its leading bit in bit 20 of xh. + lsls xl, xl, r3 + + // Calculate the exponent, and recombine it with the mantissa. This is + // exactly the same method as above, except that the exponent is different, + // because this time r3 stores the offset between the original leading bit + // position and bit 20 of the mantissa, so that it's zero if the input is in + // the range [2^52,2^53), which would make the output exponent 0x433, or + // 0x432 after compensating for the leading mantissa bit. + rsb r3, r3, #0x32 // 0x32 minus shift count + add r3, r3, #0x400 // 0x432 minus shift count + add xh, xh, r3, lsl #20 // combine with the top word of the mantissa + bx lr + +LOCAL_LABEL(shiftdown): + // This is the branch for numbers so big that the mantissa has to be shifted + // _right_, so that some of the mantissa is shifted off the bottom and the + // number has to be rounded. + // + // r3 contains the shift count, but it's currently negative (it was + // calculated as a left shift). So it's in a good state to use for + // calculating the output exponent, and therefore we do that first, while + // it's convenient. + rsb r2, r3, #0x32 // 0x32 minus shift count + add r2, r2, #0x400 // 0x432 minus shift count + + // Shift the mantissa down to the right position, capturing the bits shifted + // off the bottom at the top of r3. We'll need to temporarily push a couple + // of extra registers for this part, because we need to calculate how far to + // shift xh and xl right, but also how far to shift them left to get the bits + // shifted out of each one. + push {r4,lr} + rsb r4, r3, #0 // r4 = right-shift count + rsb lr, r4, #32 // lr = left-shift count + lsl r12, xh, lr // r12 = bits shifted out of xh + lsr xh, xh, r4 // shift xh right to make its final value + lsl r3, xl, lr // r3 = bits shifted out of xl +#if !__thumb__ + orrs xl, r12, xl, lsr r4 // shift xl right and combine with r12 +#else + // In Thumb we have to do the register-controlled shift and the OR in + // separate instructions. + lsrs xl, xl, r4 + orr xl, xl, r12 +#endif + pop {r4,lr} + + // Now xh:xl contains the unrounded output mantissa; r2 contains its + // exponent; and r3 contains the bits shifted off the bottom. Also, the + // single flag-setting shift in the sequence above was the one that shifted + // xl right, so the carry flag contains the bit just off the bottom, i.e. the + // bit that tells us whether we need to round up. + // + // Recombine the mantissa with the exponent, and then if C is clear, we don't + // need to round up, and can return. + add xh, xh, r2, lsl #20 // put back the exponent + bxcc lr // return if we don't have to round + + // We're rounding up, and we may also need to round to even. + adds xl, xl, #1 // increment the mantissa to round up + adc xh, xh, #0 // and propagate a carry if any + lsls r3, r3, #1 // set Z if we had an exact halfway case + biceq xl, xl, #1 // and round back to even if so + bx lr + +END_COMPILERRT_FUNCTION(__aeabi_ul2d) + +NO_EXEC_STACK_DIRECTIVE diff --git a/compiler-rt/lib/builtins/arm/floatunssidf.S b/compiler-rt/lib/builtins/arm/floatunssidf.S new file mode 100644 index 0000000000000..33976ff357832 --- /dev/null +++ b/compiler-rt/lib/builtins/arm/floatunssidf.S @@ -0,0 +1,64 @@ +//===-- floatunssidf.S - 32-bit unsigned int to double-precision FP conversion// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements the __floatunssidf function (32-bit unsigned integer to +// double precision floating point conversion), with the IEEE-754 default +// rounding (to nearest, ties to even), for the Arm and Thumb2 ISAs. +// +//===----------------------------------------------------------------------===// + +#include "../assembly.h" +#include "endian.h" + + .syntax unified + .text + .p2align 2 + +#if __ARM_PCS_VFP +DEFINE_COMPILERRT_FUNCTION(__floatunssidf) + push {r4, lr} + bl __aeabi_ui2d + VMOV_TO_DOUBLE(d0, r0, r1) + pop {r4, pc} +#else +DEFINE_COMPILERRT_FUNCTION_ALIAS(__floatunssidf, __aeabi_ui2d) +#endif + +DEFINE_COMPILERRT_FUNCTION(__aeabi_ui2d) + + // Shift the leading bit of the input to the top of xh. In the + // process, we detect a zero input, and branch out of line for it. + clz r3, r0 + lsls xh, r0, r3 + beq LOCAL_LABEL(zero) + + // Use the shift count in r3 to calculate the output exponent. + // + // If r3=0 then the input value was in the range [2^31,2^32), so its exponent + // in double precision should be 0x41e. We want to reduce that by 1 so that + // the leading bit of the mantissa will increment it when we add it in. So + // the exponent should be 0x41d minus r3. + rsb r3, r3, #0x1d // 0x1d minus shift count + add r3, r3, #0x400 // 0x41d minus shift count + lsl r2, r3, #20 // r2 is now shifted exponent + + // Shift the mantissa left and right to get the parts that go in xl and xh, + // and combine with the exponent we just computed. + lsl xl, xh, #21 // low bits of mantissa + add xh, r2, xh, lsr #11 // high bits of mantissa + exponent + bx lr + +LOCAL_LABEL(zero): + // We come here if the input was zero. We've just set xh to 0, so we + // only need to set xl to 0 too and return. + mov xl, #0 + bx lr + +END_COMPILERRT_FUNCTION(__aeabi_ui2d) + +NO_EXEC_STACK_DIRECTIVE diff --git a/compiler-rt/lib/builtins/arm/floatunssisf.S b/compiler-rt/lib/builtins/arm/floatunssisf.S new file mode 100644 index 0000000000000..6f6f3281adf16 --- /dev/null +++ b/compiler-rt/lib/builtins/arm/floatunssisf.S @@ -0,0 +1,103 @@ +//===-- floatunssisf.S - 32-bit unsigned int to single-precision FP conversion// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements the __floatunssisf function (32-bit unsigned integer to +// single precision floating point conversion), with the IEEE-754 default +// rounding (to nearest, ties to even), for the Arm and Thumb2 ISAs. +// +//===----------------------------------------------------------------------===// + +#include "../assembly.h" + + .syntax unified + .text + .p2align 2 + +#if __ARM_PCS_VFP +DEFINE_COMPILERRT_FUNCTION(__floatunssisf) + push {r4, lr} + bl __aeabi_ui2f + vmov s0, r0 + pop {r4, pc} +#else +DEFINE_COMPILERRT_FUNCTION_ALIAS(__floatunssisf, __aeabi_ui2f) +#endif + +DEFINE_COMPILERRT_FUNCTION(__aeabi_ui2f) + + // Find the highest set bit of the input, and shift it up to the top bit. r3 + // contains the amount we shifted by, and r2 the shifted value. + clz r3, r0 + lsls r2, r0, r3 + + // Convert the shift distance into the exponent of the output float. The + // exponent for an integer with bit 31 set should be 0x7f (the IEEE exponent + // bias) plus 31, which is 0x9e. Here we reduce that by 1, because when we + // add the mantissa to it, the leading mantissa bit will increment it. + rsb r3, r3, #0x9d + + // A side effect of the LSLS above was to set the Z flag if the input integer + // was actually zero. In that situation, we can just return immediately, + // because r0 still _contains_ the input integer, which has the same + // representation as the floating-point +0 that we should return. +#if !__thumb__ + bxeq lr +#else + // In Thumb, we do the conditional return by branching to a return + // instruction later in the function. This costs more time in the case where + // the return is taken, but saves an IT in the case where it's not, and we + // assume that nonzero integers are converted to FP more often than zero is. + // (This also improves the _worst-case_ running time, because the nonzero + // code path is the limiting factor.) + beq LOCAL_LABEL(return) +#endif + + // Shift the exponent up to its final bit position. + lsls r1, r3, #23 + + // Recombine the mantissa with the exponent, and round. This is done + // differently between Arm and Thumb. +#if !__thumb__ + // Arm rounding sequence: shift the round bit off the top of r2 into C, and + // simultaneously set Z if the lower-down bits are all zero. + lsls r3, r2, #25 + // Recombine mantissa with exponent, using ADC so that this also adds 1 if + // we're rounding up. + adc r0, r1, r2, lsr #8 + // If C was clear, we can't possibly need to round to even, so return. + bxcc lr + // Now we've definitely rounded up, and if Z is set, round to even. + biceq r0, r0, #1 + bx lr +#else + // Thumb rounding sequence: we do things in a slightly different order, by + // recombining first with plain ADD, and _then_ testing the round bit. On + // simple M-profile CPUs like Cortex-M3, this avoids the IT instruction + // (inserted before BXCC lr) costing a cycle, because it immediately follows + // a 16-bit LSLS instruction, so the CPU had already fetched it. + // + // So we save a cycle in the case where we don't round up, at the cost of a + // cycle in the case where we do (requiring a separate ADD instruction after + // the BXCC lr isn't taken). We expect that this is a good trade, on the + // theory that _most_ integers converted into floating point are not large + // enough to need rounding at all, so all the exact cases _and_ half the + // inexact ones will benefit from the saving. + add r0, r1, r2, lsr #8 // r0 is now exp+mant, unrounded + lsls.N r3, r2, #25 // .N to make sure it's assembled as 16-bit + bxcc lr + // Now if we didn't take the return, we must definitely round up, and + // conditionally round to even. + add r0, r0, #1 + biceq r0, r0, #1 +LOCAL_LABEL(return): // label we branch to from the 0 case above + bx lr +#endif + +END_COMPILERRT_FUNCTION(__aeabi_ui2f) + +NO_EXEC_STACK_DIRECTIVE diff --git a/compiler-rt/test/builtins/Unit/floatdidfnew_test.c b/compiler-rt/test/builtins/Unit/floatdidfnew_test.c new file mode 100644 index 0000000000000..601be3f573263 --- /dev/null +++ b/compiler-rt/test/builtins/Unit/floatdidfnew_test.c @@ -0,0 +1,68 @@ +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +// RUN: %clang_builtins %s %librt -o %t && %run %t +// REQUIRES: librt_has_floatdidf + +#include "int_lib.h" +#include <inttypes.h> +#include <stdio.h> + +#include "fp_test.h" + +// Returns: a converted from int64_t to double +COMPILER_RT_ABI double __floatdidf(int64_t a); + +int test__floatdidf(int line, uint64_t a, uint64_t expected_rep) { + double x = __floatdidf(a); + int ret = compareResultD(x, expected_rep); + + if (ret) { + printf("error at line %d: __floatdidf(%016" PRIx64 ") = %016" PRIx64 + ", expected %016" PRIx64 "\n", + line, a, toRep64(x), expected_rep); + } + return ret; +} + +#define test__floatdidf(a,x) test__floatdidf(__LINE__,a,x) + +int main(void) { + int status = 0; + + status |= test__floatdidf(0x0000000000000000, 0x0000000000000000); + status |= test__floatdidf(0x0000000000000001, 0x3ff0000000000000); + status |= test__floatdidf(0x0000000000000001, 0x3ff0000000000000); + status |= test__floatdidf(0x0000000080000000, 0x41e0000000000000); + status |= test__floatdidf(0x0000000080000001, 0x41e0000000200000); + status |= test__floatdidf(0x0000000080000003, 0x41e0000000600000); + status |= test__floatdidf(0x0000000080000007, 0x41e0000000e00000); + status |= test__floatdidf(0x00000000fffffff8, 0x41efffffff000000); + status |= test__floatdidf(0x00000000fffffffc, 0x41efffffff800000); + status |= test__floatdidf(0x00000000fffffffe, 0x41efffffffc00000); + status |= test__floatdidf(0x00000000ffffffff, 0x41efffffffe00000); + status |= test__floatdidf(0x0000082345670000, 0x42a0468ace000000); + status |= test__floatdidf(0x0100000000000000, 0x4370000000000000); + status |= test__floatdidf(0x0100000000000004, 0x4370000000000000); + status |= test__floatdidf(0x0100000000000008, 0x4370000000000000); + status |= test__floatdidf(0x010000000000000c, 0x4370000000000001); + status |= test__floatdidf(0x0100000000000010, 0x4370000000000001); + status |= test__floatdidf(0x0100000000000014, 0x4370000000000001); + status |= test__floatdidf(0x0100000000000018, 0x4370000000000002); + status |= test__floatdidf(0x010000000000001c, 0x4370000000000002); + status |= test__floatdidf(0x7fffffffffffffff, 0x43e0000000000000); + status |= test__floatdidf(0x8000000000000000, 0xc3e0000000000000); + status |= test__floatdidf(0x8000000000000001, 0xc3e0000000000000); + status |= test__floatdidf(0xfeffffffffffffe4, 0xc370000000000002); + status |= test__floatdidf(0xfeffffffffffffe8, 0xc370000000000002); + status |= test__floatdidf(0xfeffffffffffffec, 0xc370000000000001); + status |= test__floatdidf(0xfefffffffffffff0, 0xc370000000000001); + status |= test__floatdidf(0xfefffffffffffff4, 0xc370000000000001); + status |= test__floatdidf(0xfefffffffffffff8, 0xc370000000000000); + status |= test__floatdidf(0xfefffffffffffffc, 0xc370000000000000); + status |= test__floatdidf(0xff00000000000000, 0xc370000000000000); + status |= test__floatdidf(0xffe9ef445b91437b, 0xc33610bba46ebc85); + + return status; +} diff --git a/compiler-rt/test/builtins/Unit/floatdisfnew_test.c b/compiler-rt/test/builtins/Unit/floatdisfnew_test.c new file mode 100644 index 0000000000000..66a7193763b66 --- /dev/null +++ b/compiler-rt/test/builtins/Unit/floatdisfnew_test.c @@ -0,0 +1,59 @@ +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +// RUN: %clang_builtins %s %librt -o %t && %run %t +// REQUIRES: librt_has_floatdisf + +#include "int_lib.h" +#include <inttypes.h> +#include <stdio.h> + +#include "fp_test.h" + +// Returns: a converted from int64_t to float +COMPILER_RT_ABI float __floatdisf(int64_t a); + +int test__floatdisf(int line, uint64_t a, uint32_t expected_rep) { + float x = __floatdisf(a); + int ret = compareResultF(x, expected_rep); + + if (ret) { + printf("error at line %d: __floatdisf(%016" PRIx64 ") = %08" PRIx32 + ", expected %08" PRIx32 "\n", + line, a, toRep32(x), expected_rep); + } + return ret; +} + +#define test__floatdisf(a,x) test__floatdisf(__LINE__,a,x) + +int main(void) { + int status = 0; + + status |= test__floatdisf(0x0000000000000000, 0x00000000); + status |= test__floatdisf(0x0000000000000001, 0x3f800000); + status |= test__floatdisf(0x0000000008000000, 0x4d000000); + status |= test__floatdisf(0x0000000008000004, 0x4d000000); + status |= test__floatdisf(0x0000000008000008, 0x4d000000); + status |= test__floatdisf(0x000000000800000c, 0x4d000001); + status |= test__floatdisf(0x0000000008000010, 0x4d000001); + status |= test__floatdisf(0x0000000008000014, 0x4d000001); + status |= test__floatdisf(0x0000000008000018, 0x4d000002); + status |= test__floatdisf(0x000000000800001c, 0x4d000002); + status |= test__floatdisf(0x0000082345000000, 0x55023450); + status |= test__floatdisf(0x4000004000000001, 0x5e800001); + status |= test__floatdisf(0x7fffffffffffffff, 0x5f000000); + status |= test__floatdisf(0x8000000000000000, 0xdf000000); + status |= test__floatdisf(0x8000000000000001, 0xdf000000); + status |= test__floatdisf(0xfffffffff7ffffe4, 0xcd000002); + status |= test__floatdisf(0xfffffffff7ffffe8, 0xcd000002); + status |= test__floatdisf(0xfffffffff7ffffec, 0xcd000001); + status |= test__floatdisf(0xfffffffff7fffff0, 0xcd000001); + status |= test__floatdisf(0xfffffffff7fffff4, 0xcd000001); + status |= test__floatdisf(0xfffffffff7fffff8, 0xcd000000); + status |= test__floatdisf(0xfffffffff7fffffc, 0xcd000000); + status |= test__floatdisf(0xfffffffff8000000, 0xcd000000); + + return status; +} diff --git a/compiler-rt/test/builtins/Unit/floatsidfnew_test.c b/compiler-rt/test/builtins/Unit/floatsidfnew_test.c new file mode 100644 index 0000000000000..55ccca3c8d2d1 --- /dev/null +++ b/compiler-rt/test/builtins/Unit/floatsidfnew_test.c @@ -0,0 +1,43 @@ +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +// RUN: %clang_builtins %s %librt -o %t && %run %t +// REQUIRES: librt_has_floatsidf + +#include "int_lib.h" +#include <inttypes.h> +#include <stdio.h> + +#include "fp_test.h" + +// Returns: a converted from int32_t to double +COMPILER_RT_ABI double __floatsidf(int32_t a); + +int test__floatsidf(int line, uint32_t a, uint64_t expected_rep) { + double x = __floatsidf(a); + int ret = compareResultD(x, expected_rep); + + if (ret) { + printf("error at line %d: __floatsidf(%08" PRIx32 ") = %016" PRIx64 + ", expected %016" PRIx64 "\n", + line, a, toRep64(x), expected_rep); + } + return ret; +} + +#define test__floatsidf(a,x) test__floatsidf(__LINE__,a,x) + +int main(void) { + int status = 0; + + status |= test__floatsidf(0x00000000, 0x0000000000000000); + status |= test__floatsidf(0x00000001, 0x3ff0000000000000); + status |= test__floatsidf(0x40000200, 0x41d0000080000000); + status |= test__floatsidf(0x40000400, 0x41d0000100000000); + status |= test__floatsidf(0x7fffffff, 0x41dfffffffc00000); + status |= test__floatsidf(0x80000000, 0xc1e0000000000000); + status |= test__floatsidf(0x80000001, 0xc1dfffffffc00000); + + return status; +} diff --git a/compiler-rt/test/builtins/Unit/floatsisfnew_test.c b/compiler-rt/test/builtins/Unit/floatsisfnew_test.c new file mode 100644 index 0000000000000..ef718f15f0214 --- /dev/null +++ b/compiler-rt/test/builtins/Unit/floatsisfnew_test.c @@ -0,0 +1,57 @@ +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +// RUN: %clang_builtins %s %librt -o %t && %run %t +// REQUIRES: librt_has_floatsisf + +#include "int_lib.h" +#include <inttypes.h> +#include <stdio.h> + +#include "fp_test.h" + +// Returns: a converted from int32_t to float +COMPILER_RT_ABI float __floatsisf(int32_t a); + +int test__floatsisf(int line, uint32_t a, uint32_t expected_rep) { + float x = __floatsisf(a); + int ret = compareResultF(x, expected_rep); + + if (ret) { + printf("error at line %d: __floatsisf(%08" PRIx32 ") = %08" PRIx32 + ", expected %08" PRIx32 "\n", + line, a, toRep32(x), expected_rep); + } + return ret; +} + +#define test__floatsisf(a,x) test__floatsisf(__LINE__,a,x) + +int main(void) { + int status = 0; + + status |= test__floatsisf(0x00000000, 0x00000000); + status |= test__floatsisf(0x00000001, 0x3f800000); + status |= test__floatsisf(0x08000000, 0x4d000000); + status |= test__floatsisf(0x08000004, 0x4d000000); + status |= test__floatsisf(0x08000008, 0x4d000000); + status |= test__floatsisf(0x0800000c, 0x4d000001); + status |= test__floatsisf(0x08000010, 0x4d000001); + status |= test__floatsisf(0x08000014, 0x4d000001); + status |= test__floatsisf(0x08000018, 0x4d000002); + status |= test__floatsisf(0x0800001c, 0x4d000002); + status |= test__floatsisf(0x7fffffff, 0x4f000000); + status |= test__floatsisf(0x80000000, 0xcf000000); + status |= test__floatsisf(0x80000001, 0xcf000000); + status |= test__floatsisf(0xf7ffffe4, 0xcd000002); + status |= test__floatsisf(0xf7ffffe8, 0xcd000002); + status |= test__floatsisf(0xf7ffffec, 0xcd000001); + status |= test__floatsisf(0xf7fffff0, 0xcd000001); + status |= test__floatsisf(0xf7fffff4, 0xcd000001); + status |= test__floatsisf(0xf7fffff8, 0xcd000000); + status |= test__floatsisf(0xf7fffffc, 0xcd000000); + status |= test__floatsisf(0xf8000000, 0xcd000000); + + return status; +} diff --git a/compiler-rt/test/builtins/Unit/floatunsdidfnew_test.c b/compiler-rt/test/builtins/Unit/floatunsdidfnew_test.c new file mode 100644 index 0000000000000..7199e02580738 --- /dev/null +++ b/compiler-rt/test/builtins/Unit/floatunsdidfnew_test.c @@ -0,0 +1,57 @@ +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +// RUN: %clang_builtins %s %librt -o %t && %run %t +// REQUIRES: librt_has_floatunsdidf + +#include "int_lib.h" +#include <inttypes.h> +#include <stdio.h> + +#include "fp_test.h" + +// Returns: a converted from uint64_t to double +COMPILER_RT_ABI double __floatunsdidf(uint64_t a); + +int test__floatunsdidf(int line, uint64_t a, uint64_t expected_rep) { + double x = __floatunsdidf(a); + int ret = compareResultD(x, expected_rep); + + if (ret) { + printf("error at line %d: __floatunsdidf(%016" PRIx64 ") = %016" PRIx64 + ", expected %016" PRIx64 "\n", + line, a, toRep64(x), expected_rep); + } + return ret; +} + +#define test__floatunsdidf(a,x) test__floatunsdidf(__LINE__,a,x) + +int main(void) { + int status = 0; + + status |= test__floatunsdidf(0x0000000000000000, 0x0000000000000000); + status |= test__floatunsdidf(0x0000000000000001, 0x3ff0000000000000); + status |= test__floatunsdidf(0x0000000080000000, 0x41e0000000000000); + status |= test__floatunsdidf(0x0000000080000001, 0x41e0000000200000); + status |= test__floatunsdidf(0x0000000080000003, 0x41e0000000600000); + status |= test__floatunsdidf(0x0000000080000007, 0x41e0000000e00000); + status |= test__floatunsdidf(0x00000000fffffff8, 0x41efffffff000000); + status |= test__floatunsdidf(0x00000000fffffffc, 0x41efffffff800000); + status |= test__floatunsdidf(0x00000000fffffffe, 0x41efffffffc00000); + status |= test__floatunsdidf(0x00000000ffffffff, 0x41efffffffe00000); + status |= test__floatunsdidf(0x0000082345670000, 0x42a0468ace000000); + status |= test__floatunsdidf(0x0013d0942cab7317, 0x4333d0942cab7317); + status |= test__floatunsdidf(0x0100000000000000, 0x4370000000000000); + status |= test__floatunsdidf(0x0100000000000004, 0x4370000000000000); + status |= test__floatunsdidf(0x0100000000000008, 0x4370000000000000); + status |= test__floatunsdidf(0x010000000000000c, 0x4370000000000001); + status |= test__floatunsdidf(0x0100000000000010, 0x4370000000000001); + status |= test__floatunsdidf(0x0100000000000014, 0x4370000000000001); + status |= test__floatunsdidf(0x0100000000000018, 0x4370000000000002); + status |= test__floatunsdidf(0x010000000000001c, 0x4370000000000002); + status |= test__floatunsdidf(0xffffffffffffffff, 0x43f0000000000000); + + return status; +} diff --git a/compiler-rt/test/builtins/Unit/floatunsdisfnew_test.c b/compiler-rt/test/builtins/Unit/floatunsdisfnew_test.c new file mode 100644 index 0000000000000..f2d77da1639cf --- /dev/null +++ b/compiler-rt/test/builtins/Unit/floatunsdisfnew_test.c @@ -0,0 +1,51 @@ +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +// RUN: %clang_builtins %s %librt -o %t && %run %t +// REQUIRES: librt_has_floatunsdisf + +#include "int_lib.h" +#include <inttypes.h> +#include <stdio.h> + +#include "fp_test.h" + +// Returns: a converted from uint64_t to float +COMPILER_RT_ABI float __floatunsdisf(uint64_t a); + +int test__floatunsdisf(int line, uint64_t a, uint32_t expected_rep) { + float x = __floatunsdisf(a); + int ret = compareResultF(x, expected_rep); + + if (ret) { + printf("error at line %d: __floatunsdisf(%016" PRIx64 ") = %08" PRIx32 + ", expected %08" PRIx32 "\n", + line, a, toRep32(x), expected_rep); + } + return ret; +} + +#define test__floatunsdisf(a,x) test__floatunsdisf(__LINE__,a,x) + +int main(void) { + int status = 0; + + status |= test__floatunsdisf(0x0000000000000000, 0x00000000); + status |= test__floatunsdisf(0x0000000000000001, 0x3f800000); + status |= test__floatunsdisf(0x0000000008000000, 0x4d000000); + status |= test__floatunsdisf(0x0000000008000004, 0x4d000000); + status |= test__floatunsdisf(0x0000000008000008, 0x4d000000); + status |= test__floatunsdisf(0x000000000800000c, 0x4d000001); + status |= test__floatunsdisf(0x0000000008000010, 0x4d000001); + status |= test__floatunsdisf(0x0000000008000014, 0x4d000001); + status |= test__floatunsdisf(0x0000000008000018, 0x4d000002); + status |= test__floatunsdisf(0x000000000800001c, 0x4d000002); + status |= test__floatunsdisf(0x0000082345000000, 0x55023450); + status |= test__floatunsdisf(0x4000004000000001, 0x5e800001); + status |= test__floatunsdisf(0x8000000000000000, 0x5f000000); + status |= test__floatunsdisf(0x8000008000000000, 0x5f000000); + status |= test__floatunsdisf(0xffffffffffffffff, 0x5f800000); + + return status; +} diff --git a/compiler-rt/test/builtins/Unit/floatunssidfnew_test.c b/compiler-rt/test/builtins/Unit/floatunssidfnew_test.c new file mode 100644 index 0000000000000..07e1ad3928846 --- /dev/null +++ b/compiler-rt/test/builtins/Unit/floatunssidfnew_test.c @@ -0,0 +1,41 @@ +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +// RUN: %clang_builtins %s %librt -o %t && %run %t +// REQUIRES: librt_has_floatunssidf + +#include "int_lib.h" +#include <inttypes.h> +#include <stdio.h> + +#include "fp_test.h" + +// Returns: a converted from uint32_t to double +COMPILER_RT_ABI double __floatunssidf(uint32_t a); + +int test__floatunssidf(int line, uint32_t a, uint64_t expected_rep) { + double x = __floatunssidf(a); + int ret = compareResultD(x, expected_rep); + + if (ret) { + printf("error at line %d: __floatunssidf(%08" PRIx32 ") = %016" PRIx64 + ", expected %016" PRIx64 "\n", + line, a, toRep64(x), expected_rep); + } + return ret; +} + +#define test__floatunssidf(a,x) test__floatunssidf(__LINE__,a,x) + +int main(void) { + int status = 0; + + status |= test__floatunssidf(0x00000000, 0x0000000000000000); + status |= test__floatunssidf(0x00000001, 0x3ff0000000000000); + status |= test__floatunssidf(0x80000400, 0x41e0000080000000); + status |= test__floatunssidf(0x80000800, 0x41e0000100000000); + status |= test__floatunssidf(0xffffffff, 0x41efffffffe00000); + + return status; +} diff --git a/compiler-rt/test/builtins/Unit/floatunssisfnew_test.c b/compiler-rt/test/builtins/Unit/floatunssisfnew_test.c new file mode 100644 index 0000000000000..5ec0cc7426ab0 --- /dev/null +++ b/compiler-rt/test/builtins/Unit/floatunssisfnew_test.c @@ -0,0 +1,55 @@ +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +// RUN: %clang_builtins %s %librt -o %t && %run %t +// REQUIRES: librt_has_floatunssisf + +#include "int_lib.h" +#include <inttypes.h> +#include <stdio.h> + +#include "fp_test.h" + +// Returns: a converted from uint32_t to float +COMPILER_RT_ABI float __floatunssisf(uint32_t a); + +int test__floatunssisf(int line, uint32_t a, uint32_t expected_rep) { + float x = __floatunssisf(a); + int ret = compareResultF(x, expected_rep); + + if (ret) { + printf("error at line %d: __floatunssisf(%08" PRIx32 ") = %08" PRIx32 + ", expected %08" PRIx32 "\n", + line, a, toRep32(x), expected_rep); + } + return ret; +} + +#define test__floatunssisf(a,x) test__floatunssisf(__LINE__,a,x) + +int main(void) { + int status = 0; + + status |= test__floatunssisf(0x00000000, 0x00000000); + status |= test__floatunssisf(0x00000001, 0x3f800000); + status |= test__floatunssisf(0x08000000, 0x4d000000); + status |= test__floatunssisf(0x08000004, 0x4d000000); + status |= test__floatunssisf(0x08000008, 0x4d000000); + status |= test__floatunssisf(0x0800000c, 0x4d000001); + status |= test__floatunssisf(0x08000010, 0x4d000001); + status |= test__floatunssisf(0x08000014, 0x4d000001); + status |= test__floatunssisf(0x08000018, 0x4d000002); + status |= test__floatunssisf(0x0800001c, 0x4d000002); + status |= test__floatunssisf(0xfffffe00, 0x4f7ffffe); + status |= test__floatunssisf(0xfffffe7f, 0x4f7ffffe); + status |= test__floatunssisf(0xfffffe80, 0x4f7ffffe); + status |= test__floatunssisf(0xfffffe81, 0x4f7fffff); + status |= test__floatunssisf(0xffffff00, 0x4f7fffff); + status |= test__floatunssisf(0xffffff7f, 0x4f7fffff); + status |= test__floatunssisf(0xffffff80, 0x4f800000); + status |= test__floatunssisf(0xffffff81, 0x4f800000); + status |= test__floatunssisf(0xffffffff, 0x4f800000); + + return status; +} _______________________________________________ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
