atrosinenko updated this revision to Diff 288623. atrosinenko added a comment.
Add some other explanations. Repository: rG LLVM Github Monorepo CHANGES SINCE LAST ACTION https://reviews.llvm.org/D85031/new/ https://reviews.llvm.org/D85031 Files: compiler-rt/lib/builtins/divdf3.c compiler-rt/lib/builtins/divsf3.c compiler-rt/lib/builtins/divtf3.c compiler-rt/lib/builtins/fp_div_impl.inc compiler-rt/lib/builtins/fp_lib.h compiler-rt/lib/builtins/int_util.h compiler-rt/test/builtins/Unit/divdf3_test.c
Index: compiler-rt/test/builtins/Unit/divdf3_test.c =================================================================== --- compiler-rt/test/builtins/Unit/divdf3_test.c +++ compiler-rt/test/builtins/Unit/divdf3_test.c @@ -92,5 +92,13 @@ if (test__divdf3(0x1.0p+0, 0x1.00000001p+0, UINT64_C(0x3fefffffffe00000))) return 1; + // some misc test cases obtained by fuzzing against h/w implementation + if (test__divdf3(0x1.fdc239dd64735p-658, -0x1.fff9364c0843fp-948, UINT64_C(0xd20fdc8fc0ceffb1))) + return 1; + if (test__divdf3(-0x1.78abb261d47c8p+794, 0x1.fb01d537cc5aep+266, UINT64_C(0xe0e7c6148ffc23e3))) + return 1; + if (test__divdf3(-0x1.da7dfe6048b8bp-875, 0x1.ffc7ea3ff60a4p-610, UINT64_C(0xaf5dab1fe0269e2a))) + return 1; + return 0; } Index: compiler-rt/lib/builtins/int_util.h =================================================================== --- compiler-rt/lib/builtins/int_util.h +++ compiler-rt/lib/builtins/int_util.h @@ -28,4 +28,20 @@ #define COMPILE_TIME_ASSERT2(expr, cnt) \ typedef char ct_assert_##cnt[(expr) ? 1 : -1] UNUSED +// Force unrolling the code specified to be repeated N times. +#define REPEAT_0_TIMES(code_to_repeat) /* do nothing */ +#define REPEAT_1_TIMES(code_to_repeat) code_to_repeat +#define REPEAT_2_TIMES(code_to_repeat) \ + REPEAT_1_TIMES(code_to_repeat) \ + code_to_repeat +#define REPEAT_3_TIMES(code_to_repeat) \ + REPEAT_2_TIMES(code_to_repeat) \ + code_to_repeat +#define REPEAT_4_TIMES(code_to_repeat) \ + REPEAT_3_TIMES(code_to_repeat) \ + code_to_repeat + +#define REPEAT_N_TIMES_(N, code_to_repeat) REPEAT_##N##_TIMES(code_to_repeat) +#define REPEAT_N_TIMES(N, code_to_repeat) REPEAT_N_TIMES_(N, code_to_repeat) + #endif // INT_UTIL_H Index: compiler-rt/lib/builtins/fp_lib.h =================================================================== --- compiler-rt/lib/builtins/fp_lib.h +++ compiler-rt/lib/builtins/fp_lib.h @@ -40,9 +40,12 @@ #if defined SINGLE_PRECISION +typedef uint16_t half_rep_t; typedef uint32_t rep_t; +typedef uint64_t twice_rep_t; typedef int32_t srep_t; typedef float fp_t; +#define HALF_REP_C UINT16_C #define REP_C UINT32_C #define significandBits 23 @@ -58,9 +61,11 @@ #elif defined DOUBLE_PRECISION +typedef uint32_t half_rep_t; typedef uint64_t rep_t; typedef int64_t srep_t; typedef double fp_t; +#define HALF_REP_C UINT32_C #define REP_C UINT64_C #define significandBits 52 @@ -102,9 +107,11 @@ #elif defined QUAD_PRECISION #if __LDBL_MANT_DIG__ == 113 && defined(__SIZEOF_INT128__) #define CRT_LDBL_128BIT +typedef uint64_t half_rep_t; typedef __uint128_t rep_t; typedef __int128_t srep_t; typedef long double fp_t; +#define HALF_REP_C UINT64_C #define REP_C (__uint128_t) // Note: Since there is no explicit way to tell compiler the constant is a // 128-bit integer, we let the constant be casted to 128-bit integer Index: compiler-rt/lib/builtins/fp_div_impl.inc =================================================================== --- /dev/null +++ compiler-rt/lib/builtins/fp_div_impl.inc @@ -0,0 +1,389 @@ +//===-- fp_div_impl.inc - Floating point division -----------------*- C -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements soft-float division with the IEEE-754 default +// rounding (to nearest, ties to even). +// +//===----------------------------------------------------------------------===// + +#include "fp_lib.h" + +// The __divXf3__ function implements Newton-Raphson floating point division. +// It uses 3 iterations for float32, 4 for float64 and 5 for float128, +// respectively. Due to number of significant bits being roughly doubled +// every iteration, the two modes are supported: N full-width iterations (as +// it is done for float32 by default) and (N-1) half-width iteration plus one +// final full-width iteration. It is expected that half-width integer +// operations (w.r.t rep_t size) can be performed faster for some hardware but +// they require error estimations to be computed separately due to larger +// computational errors caused by truncating intermediate results. + +// Half the bit-size of rep_t +#define HW (typeWidth / 2) +// rep_t-sized bitmask with lower half of bits set to ones +#define loMask (REP_C(-1) >> HW) + +#define NUMBER_OF_ITERATIONS \ + (NUMBER_OF_HALF_ITERATIONS + NUMBER_OF_FULL_ITERATIONS) + +#if NUMBER_OF_FULL_ITERATIONS < 1 +#error At least one full iteration is required +#endif + +static __inline fp_t __divXf3__(fp_t a, fp_t b) { + + const unsigned int aExponent = toRep(a) >> significandBits & maxExponent; + const unsigned int bExponent = toRep(b) >> significandBits & maxExponent; + const rep_t quotientSign = (toRep(a) ^ toRep(b)) & signBit; + + rep_t aSignificand = toRep(a) & significandMask; + rep_t bSignificand = toRep(b) & significandMask; + int scale = 0; + + // Detect if a or b is zero, denormal, infinity, or NaN. + if (aExponent - 1U >= maxExponent - 1U || + bExponent - 1U >= maxExponent - 1U) { + + const rep_t aAbs = toRep(a) & absMask; + const rep_t bAbs = toRep(b) & absMask; + + // NaN / anything = qNaN + if (aAbs > infRep) + return fromRep(toRep(a) | quietBit); + // anything / NaN = qNaN + if (bAbs > infRep) + return fromRep(toRep(b) | quietBit); + + if (aAbs == infRep) { + // infinity / infinity = NaN + if (bAbs == infRep) + return fromRep(qnanRep); + // infinity / anything else = +/- infinity + else + return fromRep(aAbs | quotientSign); + } + + // anything else / infinity = +/- 0 + if (bAbs == infRep) + return fromRep(quotientSign); + + if (!aAbs) { + // zero / zero = NaN + if (!bAbs) + return fromRep(qnanRep); + // zero / anything else = +/- zero + else + return fromRep(quotientSign); + } + // anything else / zero = +/- infinity + if (!bAbs) + return fromRep(infRep | quotientSign); + + // One or both of a or b is denormal. The other (if applicable) is a + // normal number. Renormalize one or both of a and b, and set scale to + // include the necessary exponent adjustment. + if (aAbs < implicitBit) + scale += normalize(&aSignificand); + if (bAbs < implicitBit) + scale -= normalize(&bSignificand); + } + + // Set the implicit significand bit. If we fell through from the + // denormal path it was already set by normalize( ), but setting it twice + // won't hurt anything. + aSignificand |= implicitBit; + bSignificand |= implicitBit; + + int writtenExponent = (aExponent - bExponent + scale) + exponentBias; + + const rep_t b_UQ1 = bSignificand << (typeWidth - significandBits - 1); + + // Align the significand of b as a UQ1.(n-1) fixed-point number in the range + // [1.0, 2.0) and get a UQ0.n approximate reciprocal using a small minimax + // polynomial approximation: x0 = 3/4 + 1/sqrt(2) - b/2. + // Analytically for infinitely precise computations, for b in [1, 2): + // abs(x0(b) - 1/b) <= 3/4 - 1/sqrt(2) + // Computationally, the initial approximation is between x0(1.0) + // (about 0.9571) and x0(2.0) (about 0.4571). + + // Then, refine the reciprocal estimate using a Newton-Raphson iteration: + // x_{n+1} = x_n * (2 - x_n * b) + // + // Let b be the original divisor considered "in infinite precision" and + // obtained from IEEE754 representation of function argument (with the + // implicit bit set). Corresponds to rep_t-sized b_UQ1 represented in + // UQ1.(W-1). + // + // Let b_hw be an infinitely precise number obtained from the highest (HW-1) + // bits of divisor significand (with the implicit bit set). Corresponds to + // half_rep_t-sized b_UQ1_hw represented in UQ1.(HW-1) that is a **truncated** + // version of b_UQ1. + // + // Let e_n := x_n - 1/b_hw + // E_n := x_n - 1/b + // abs(E_n) <= abs(e_n) + (1/b_hw - 1/b) + // = abs(e_n) + (b - b_hw) / (b*b_hw) + // <= abs(e_n) + 2 * 2^-HW + + // rep_t-sized iterations may be slower than the corresponding half-width + // variant depending on the handware and whether single/double/quad precision + // is selected. + // NB: Using half-width iterations increases computation errors due to + // rounding, so error estimations have to be computed taking the selected + // mode into account! +#if NUMBER_OF_HALF_ITERATIONS > 0 + // Starting with (n-1) half-width iterations + const half_rep_t b_UQ1_hw = bSignificand >> (significandBits + 1 - HW); + + // C is (3/4 + 1/sqrt(2)) - 1 truncated to W0 fractional bits as UQ0.HW + // with W0 being either 16 or 32 and W0 <= HW. + // That is, C is the aforementioned 3/4 + 1/sqrt(2) constant (from which + // b/2 is subtracted to obtain x0) wrapped to [0, 1) range. +#if defined(SINGLE_PRECISION) + // Use 16-bit initial estimation in case we are using half-width iterations + // for float32 division. This is expected to be useful for some 16-bit + // targets. Not used by default as it requires performing more work during + // rounding and would hardly help on regular 32- or 64-bit targets. + const half_rep_t C_hw = HALF_REP_C(0x7504); +#else + // HW is at least 32. Shifting into the highest bits if needed. + const half_rep_t C_hw = HALF_REP_C(0x7504F333) << (HW - 32); +#endif + + // b >= 1, thus an upper bound for 3/4 + 1/sqrt(2) - b/2 is about 0.9572, + // so x0 fits to UQ0.HW without wrapping. + half_rep_t x_UQ0_hw = C_hw - (b_UQ1_hw /* exact b_hw/2 as UQ0.HW */); + // An e_0 error is comprised of errors due to + // * x0 being an inherently imprecise first approximation of 1/b_hw + // * C_hw being some (irrational) number **truncated** to W0 bits + // Please note that e_0 is calculated against the infinitely precise + // reciprocal of b_hw (that is, **truncated** version of b). + // + // e_0 <= 3/4 - 1/sqrt(2) + 2^-W0 + + // By construction, 1 <= b < 2 + // f(x) = x * (2 - b*x) = 2*x - b*x^2 + // f'(x) = 2 * (1 - b*x) + // + // On the (0, 1) interval, f(0) = 0, + // then it increses until f(1/b) = 1 / b, maximum on (0, 1), + // then it decreses to f(1) = 2 - b + REPEAT_N_TIMES(NUMBER_OF_HALF_ITERATIONS, { + // corr_UQ1_hw can be **larger** than 2 - b*x by at most 1*Ulp of corr_UQ1_hw. + // "0.0 - (...)" is equivalent to "2.0 - (...)" in UQ1.(HW-1). + // On the other hand, corr_UQ1_hw should not overflow from 2.0 to 0.0 provided + // no overflow ocurred earlier: ((rep_t)x_UQ0_hw * b_UQ1_hw >> HW) is + // expected to be strictly positive because b_UQ1_hw has its highest bit set + // and x_UQ0_hw should be rather large (it converges to 1/2 < 1/b_hw <= 1). + half_rep_t corr_UQ1_hw = 0 - ((rep_t)x_UQ0_hw * b_UQ1_hw >> HW); + + // Now, we should multiply UQ0.HW and UQ1.(HW-1) numbers, naturally + // obtaining an UQ1.(HW-1) number and proving its highest bit could be + // considered to be 0 to be able to represent it in UQ0.HW. + // From the above analysis of f(x), if corr_UQ1_hw would be represented + // without any intermediate loss of precision (that is, in twice_rep_t) + // x_UQ0_hw could be at most [1.]000... if b_hw is exactly 1.0 and strictly + // less otherwise. On the other hand, to obtain [1.]000..., one have to pass + // 1/b_hw == 1.0 to f(x), so this cannot occur at all without overflow. + // The fact corr_UQ1_hw was virtually round up (due to result of + // multiplication being **first** truncated, then negated) can increase + // x_UQ0_hw by up to 2*Ulp of x_UQ0_hw. + x_UQ0_hw = (rep_t)x_UQ0_hw * corr_UQ1_hw >> (HW - 1); + // Now, either no overflow occurred or x_UQ0_hw is 0 or 1 in its half_rep_t + // representation. In the latter case, x_UQ0_hw will be either 0 or 1 after + // any number of iterations, so just subtract 2 from the reciprocal + // approximation after last iteration. + + // In infinite precision, with 0 <= eps1, eps2 <= U = 2^-HW: + // corr_UQ1_hw = 2 - (1/b_hw + e_n) * b_hw + 2*eps1 + // = 1 - e_n * b_hw + 2*eps1 + // x_UQ0_hw = (1/b_hw + e_n) * (1 - e_n*b_hw + 2*eps1) - eps2 + // = 1/b_hw - e_n + 2*eps1/b_hw + e_n - e_n^2*b_hw + 2*e_n*eps1 - eps2 + // = 1/b_hw + 2*eps1/b_hw - e_n^2*b_hw + 2*e_n*eps1 - eps2 + // e_{n+1} = -e_n^2*b_hw + 2*eps1/b_hw + 2*e_n*eps1 - eps2 + // = 2*e_n*eps1 - (e_n^2*b_hw + eps2) + 2*eps1/b_hw + // \------ >0 -------/ \-- >0 ---/ + // abs(e_{n+1}) <= 2*abs(e_n)*U + max(2 * U, U + 2*e_n^2) + }) + // For initial half-width iterations, U = 2^-HW + // Let abs(e_n) <= u_n * U, + // then abs(e_{n+1}) <= U * [2*u_n*U + max(2, 1 + 2*u_n^2*U)] + // u_{n+1} <= 2 * u_n * U + max(2, 1 + 2 * u_n^2 * U) + + // Account for possible overflow (see above) before proceeding with full-width + // iterations because the condition b == 1.0 may become false here if b is + // *close enough* to 1.0. + x_UQ0_hw -= 1U; + rep_t x_UQ0 = (rep_t)x_UQ0_hw << HW; + x_UQ0 -= 1U; + +#else + // C is (3/4 + 1/sqrt(2)) - 1 truncated to 32 fractional bits as UQ0.n + const rep_t C = REP_C(0x7504F333) << (typeWidth - 32); + rep_t x_UQ0 = C - b_UQ1; + // E_0 <= 3/4 - 1/sqrt(2) + 2 * 2^-32 +#endif + + // Error estimations for full-precision iterations are calculated + // just as above, but with U := 2^-W. We need at least one such iteration. + +#ifdef USE_NATIVE_FULL_ITERATIONS + REPEAT_N_TIMES(NUMBER_OF_FULL_ITERATIONS, { + rep_t corr_UQ1 = 0 - ((twice_rep_t)x_UQ0 * b_UQ1 >> typeWidth); + x_UQ0 = (twice_rep_t)x_UQ0 * corr_UQ1 >> (typeWidth - 1); + }) +#else +#if NUMBER_OF_FULL_ITERATIONS != 1 +#error Only a single emulated full iteration is supported +#endif +#if !(NUMBER_OF_HALF_ITERATIONS > 0) + // Cannot normally reach here: only one full-width iteration is requested and + // the total number of iterations should be at least 3 even for float32. +#error Check NUMBER_OF_HALF_ITERATIONS, NUMBER_OF_FULL_ITERATIONS and USE_NATIVE_FULL_ITERATIONS. +#endif + // Simulating operations on a twice_rep_t to perform a single final full-width + // iteration. Using ad-hoc multiplication implementations to take advantage + // of particular structure of operands. + rep_t blo = b_UQ1 & loMask; + // x_UQ0 = x_UQ0_hw * 2^HW - 1 + // x_UQ0 * b_UQ1 = (x_UQ0_hw * 2^HW) * (b_UQ1_hw * 2^HW + blo) - b_UQ1 + // + // <--- higher half ---><--- lower half ---> + // [x_UQ0_hw * b_UQ1_hw] + // + [ x_UQ0_hw * blo ] + // - [ b_UQ1 ] + // = [ result ][.... discarded ...] + rep_t corr_UQ1 = 0 - ( (rep_t)x_UQ0_hw * b_UQ1_hw + + ((rep_t)x_UQ0_hw * blo >> HW) + - REP_C(1)); // account for *possible* carry + rep_t lo_corr = corr_UQ1 & loMask; + rep_t hi_corr = corr_UQ1 >> HW; + // x_UQ0 * corr_UQ1 = (x_UQ0_hw * 2^HW) * (hi_corr * 2^HW + lo_corr) - corr_UQ1 + x_UQ0 = ((rep_t)x_UQ0_hw * hi_corr << 1) + + ((rep_t)x_UQ0_hw * lo_corr >> (HW - 1)) + - REP_C(2); // 1 to account for the highest bit of corr_UQ1 can be 1 + // 1 to account for possible carry + // Just like the case of half-width iterations but with possibility + // of overflowing by one extra Ulp of x_UQ0. + x_UQ0 -= 1U; + // ... and then traditional fixup by 2 should work + + // On error estimation: + // abs(E_{N-1}) <= (u_{N-1} + 2 /* due to conversion e_n -> E_n */) * 2^-HW + // + (2^-HW + 2^-W)) + // abs(E_{N-1}) <= (u_{N-1} + 3.01) * 2^-HW + + // Then like for the half-width iterations: + // With 0 <= eps1 < 2^-W + // E_N = -E_{N-1}^2*b + 4*eps1/b + 4*E_{N-1}*eps1 - (1+2+1)*eps2 + // = 4*E_{N-1}*eps1 - (E_{N-1}^2 * b + 4 * eps2) + 4*eps1/b + // E_N <= 2^-W * [ 4*(u_{N-1} + 3.01) * 2^-HW + 4 + 2 * (e_{N-1} + 3.01)^2 ] +#endif + + // Finally, account for possible overflow, as explained above. + x_UQ0 -= 2U; + + // u_n for different precisions (with N-1 half-width iterations): + // W0 is the precision of C + // u_0 = (3/4 - 1/sqrt(2) + 2^-W0) * 2^HW + 1 + + // | f32 | f64 | f128 + // Iterations | | | + // 0 | < 2813.1 | < 184224974 | < 791240229949381501 + // 1 | < 242.7 | < 15804007 | < 67877680634450550 + // 2 | < 2.81 | < 116308 | < 499533089406164 + // 3 | | < 7.31 | < 27054455403 + // 4 | | | < 80.4 + + // Final error | < 74 / 2^32 | < 220 / 2^64 | < 13921 * 2^-128 + +#if defined(SINGLE_PRECISION) && NUMBER_OF_HALF_ITERATIONS == 2 +#define RECIPROCAL_PRECISION REP_C(74) +#elif defined(SINGLE_PRECISION) && NUMBER_OF_ITERATIONS == 3 +#define RECIPROCAL_PRECISION REP_C(8) +#elif defined(DOUBLE_PRECISION) && NUMBER_OF_ITERATIONS == 4 +#define RECIPROCAL_PRECISION REP_C(220) +#elif defined(QUAD_PRECISION) && NUMBER_OF_ITERATIONS == 5 +#define RECIPROCAL_PRECISION REP_C(13921) +#else +#error Invalid number of iterations +#endif + + // Suppose 1/b - P * 2^-W < x < 1/b + P * 2^-W + x_UQ0 -= RECIPROCAL_PRECISION; + // Now 1/b - (2*P) * 2^-W < x < 1/b + // FIXME Is x_UQ0 still >= 0.5? + + rep_t quotient_UQ1, dummy; + wideMultiply(x_UQ0, aSignificand << 1, "ient_UQ1, &dummy); + // Now, a/b - 4*P * 2^-W < q < a/b + + // quotient_UQ1 is in [0.5, 2.0) as UQ1.(SB+1), adjust it to be in [1.0, 2.0) as UQ1.SB + rep_t residualLo; + if (quotient_UQ1 < (implicitBit << 1)) { + residualLo = (aSignificand << (significandBits + 1)) - quotient_UQ1 * bSignificand; + writtenExponent -= 1; + + // the error is doubled + } else { + quotient_UQ1 >>= 1; + residualLo = (aSignificand << significandBits) - quotient_UQ1 * bSignificand; + } + // Now, q cannot be greater than a/b and can differ by at most 8*P * 2^-W + 2^-SB + // Each NextAfter() increments the floating point value by at least 2^-SB + // (more, if exponent was incremented). + // Different cases (<---> is of 2^-SB length, * = a/b that is shown as a midpoint): + // q + // | | * | | | | | + // <---> 2^t + // | | | | | * | | + // q + // To require at most one NextAfter(), an error should be less than 1.5 * 2^-SB. + // (8*P) * 2^-W + 2^-SB < 1.5 * 2^-SB <=> (8*P) * 2^-W < 0.5 * 2^-SB <=> P < 2^(W-4-SB) + // Generally, for at most R NextAfter(), the P < (2*R - 1) * 2^(W-4-SB) + // For f32: 8 < 32 (OK) but 71 > 32 (but two NextAfter() are enough) + // For f64: 220 < 256 (OK) + // For f128: 14120 > 4096 (*three* NextAfter() are required) + + // If we have overflowed the exponent, return infinity + if (writtenExponent >= maxExponent) + return fromRep(infRep | quotientSign); + + // Now, quotient_UQ1_SB <= the correctly-rounded result + // and may need taking NextAfter() up to 3 times (see error estimates above) + // r = a - b * q + + if (writtenExponent < 0) { + // Result is definitely subnormal, flushing to zero + return fromRep(quotientSign); + } + + // Clear the implicit bit + rep_t absResult = quotient_UQ1 & significandMask; + // Insert the exponent + absResult |= (rep_t)writtenExponent << significandBits; + + // Round + residualLo <<= 1; + residualLo += absResult & 1; // tie to even + absResult += residualLo > bSignificand; +#if defined(QUAD_PRECISION) || (defined(SINGLE_PRECISION) && NUMBER_OF_HALF_ITERATIONS > 0) + // Do not round Infinity to NaN + absResult += absResult < infRep && residualLo > (2 + 1) * bSignificand; +#endif +#if defined(QUAD_PRECISION) + absResult += absResult < infRep && residualLo > (4 + 1) * bSignificand; +#endif + + if ((absResult & ~significandMask) == 0) { + // Result is subnormal, flushing to zero + return fromRep(quotientSign); + } + // Result is normal, insert the sign and return + return fromRep(absResult | quotientSign); +} Index: compiler-rt/lib/builtins/divtf3.c =================================================================== --- compiler-rt/lib/builtins/divtf3.c +++ compiler-rt/lib/builtins/divtf3.c @@ -9,213 +9,18 @@ // This file implements quad-precision soft-float division // with the IEEE-754 default rounding (to nearest, ties to even). // -// For simplicity, this implementation currently flushes denormals to zero. -// It should be a fairly straightforward exercise to implement gradual -// underflow with correct rounding. -// //===----------------------------------------------------------------------===// #define QUAD_PRECISION #include "fp_lib.h" #if defined(CRT_HAS_128BIT) && defined(CRT_LDBL_128BIT) -COMPILER_RT_ABI fp_t __divtf3(fp_t a, fp_t b) { - - const unsigned int aExponent = toRep(a) >> significandBits & maxExponent; - const unsigned int bExponent = toRep(b) >> significandBits & maxExponent; - const rep_t quotientSign = (toRep(a) ^ toRep(b)) & signBit; - - rep_t aSignificand = toRep(a) & significandMask; - rep_t bSignificand = toRep(b) & significandMask; - int scale = 0; - - // Detect if a or b is zero, denormal, infinity, or NaN. - if (aExponent - 1U >= maxExponent - 1U || - bExponent - 1U >= maxExponent - 1U) { - - const rep_t aAbs = toRep(a) & absMask; - const rep_t bAbs = toRep(b) & absMask; - - // NaN / anything = qNaN - if (aAbs > infRep) - return fromRep(toRep(a) | quietBit); - // anything / NaN = qNaN - if (bAbs > infRep) - return fromRep(toRep(b) | quietBit); - - if (aAbs == infRep) { - // infinity / infinity = NaN - if (bAbs == infRep) - return fromRep(qnanRep); - // infinity / anything else = +/- infinity - else - return fromRep(aAbs | quotientSign); - } - - // anything else / infinity = +/- 0 - if (bAbs == infRep) - return fromRep(quotientSign); - - if (!aAbs) { - // zero / zero = NaN - if (!bAbs) - return fromRep(qnanRep); - // zero / anything else = +/- zero - else - return fromRep(quotientSign); - } - // anything else / zero = +/- infinity - if (!bAbs) - return fromRep(infRep | quotientSign); - - // One or both of a or b is denormal. The other (if applicable) is a - // normal number. Renormalize one or both of a and b, and set scale to - // include the necessary exponent adjustment. - if (aAbs < implicitBit) - scale += normalize(&aSignificand); - if (bAbs < implicitBit) - scale -= normalize(&bSignificand); - } - - // Set the implicit significand bit. If we fell through from the - // denormal path it was already set by normalize( ), but setting it twice - // won't hurt anything. - aSignificand |= implicitBit; - bSignificand |= implicitBit; - int quotientExponent = aExponent - bExponent + scale; - - // Align the significand of b as a Q63 fixed-point number in the range - // [1, 2.0) and get a Q64 approximate reciprocal using a small minimax - // polynomial approximation: reciprocal = 3/4 + 1/sqrt(2) - b/2. This - // is accurate to about 3.5 binary digits. - const uint64_t q63b = bSignificand >> 49; - uint64_t recip64 = UINT64_C(0x7504f333F9DE6484) - q63b; - // 0x7504f333F9DE6484 / 2^64 + 1 = 3/4 + 1/sqrt(2) - - // Now refine the reciprocal estimate using a Newton-Raphson iteration: - // - // x1 = x0 * (2 - x0 * b) - // - // This doubles the number of correct binary digits in the approximation - // with each iteration. - uint64_t correction64; - correction64 = -((rep_t)recip64 * q63b >> 64); - recip64 = (rep_t)recip64 * correction64 >> 63; - correction64 = -((rep_t)recip64 * q63b >> 64); - recip64 = (rep_t)recip64 * correction64 >> 63; - correction64 = -((rep_t)recip64 * q63b >> 64); - recip64 = (rep_t)recip64 * correction64 >> 63; - correction64 = -((rep_t)recip64 * q63b >> 64); - recip64 = (rep_t)recip64 * correction64 >> 63; - correction64 = -((rep_t)recip64 * q63b >> 64); - recip64 = (rep_t)recip64 * correction64 >> 63; - - // The reciprocal may have overflowed to zero if the upper half of b is - // exactly 1.0. This would sabatoge the full-width final stage of the - // computation that follows, so we adjust the reciprocal down by one bit. - recip64--; - - // We need to perform one more iteration to get us to 112 binary digits; - // The last iteration needs to happen with extra precision. - const uint64_t q127blo = bSignificand << 15; - rep_t correction, reciprocal; - - // NOTE: This operation is equivalent to __multi3, which is not implemented - // in some architechure - rep_t r64q63, r64q127, r64cH, r64cL, dummy; - wideMultiply((rep_t)recip64, (rep_t)q63b, &dummy, &r64q63); - wideMultiply((rep_t)recip64, (rep_t)q127blo, &dummy, &r64q127); - - correction = -(r64q63 + (r64q127 >> 64)); - - uint64_t cHi = correction >> 64; - uint64_t cLo = correction; - - wideMultiply((rep_t)recip64, (rep_t)cHi, &dummy, &r64cH); - wideMultiply((rep_t)recip64, (rep_t)cLo, &dummy, &r64cL); - - reciprocal = r64cH + (r64cL >> 64); - - // Adjust the final 128-bit reciprocal estimate downward to ensure that it - // is strictly smaller than the infinitely precise exact reciprocal. Because - // the computation of the Newton-Raphson step is truncating at every step, - // this adjustment is small; most of the work is already done. - reciprocal -= 2; - - // The numerical reciprocal is accurate to within 2^-112, lies in the - // interval [0.5, 1.0), and is strictly smaller than the true reciprocal - // of b. Multiplying a by this reciprocal thus gives a numerical q = a/b - // in Q127 with the following properties: - // - // 1. q < a/b - // 2. q is in the interval [0.5, 2.0) - // 3. The error in q is bounded away from 2^-113 (actually, we have a - // couple of bits to spare, but this is all we need). - - // We need a 128 x 128 multiply high to compute q, which isn't a basic - // operation in C, so we need to be a little bit fussy. - rep_t quotient, quotientLo; - wideMultiply(aSignificand << 2, reciprocal, "ient, "ientLo); - - // Two cases: quotient is in [0.5, 1.0) or quotient is in [1.0, 2.0). - // In either case, we are going to compute a residual of the form - // - // r = a - q*b - // - // We know from the construction of q that r satisfies: - // - // 0 <= r < ulp(q)*b - // - // If r is greater than 1/2 ulp(q)*b, then q rounds up. Otherwise, we - // already have the correct result. The exact halfway case cannot occur. - // We also take this time to right shift quotient if it falls in the [1,2) - // range and adjust the exponent accordingly. - rep_t residual; - rep_t qb; - if (quotient < (implicitBit << 1)) { - wideMultiply(quotient, bSignificand, &dummy, &qb); - residual = (aSignificand << 113) - qb; - quotientExponent--; - } else { - quotient >>= 1; - wideMultiply(quotient, bSignificand, &dummy, &qb); - residual = (aSignificand << 112) - qb; - } +#define NUMBER_OF_HALF_ITERATIONS 4 +#define NUMBER_OF_FULL_ITERATIONS 1 - const int writtenExponent = quotientExponent + exponentBias; +#include "fp_div_impl.inc" - if (writtenExponent >= maxExponent) { - // If we have overflowed the exponent, return infinity. - return fromRep(infRep | quotientSign); - } else if (writtenExponent < 1) { - if (writtenExponent == 0) { - // Check whether the rounded result is normal. - const bool round = (residual << 1) > bSignificand; - // Clear the implicit bit. - rep_t absResult = quotient & significandMask; - // Round. - absResult += round; - if (absResult & ~significandMask) { - // The rounded result is normal; return it. - return fromRep(absResult | quotientSign); - } - } - // Flush denormals to zero. In the future, it would be nice to add - // code to round them correctly. - return fromRep(quotientSign); - } else { - const bool round = (residual << 1) >= bSignificand; - // Clear the implicit bit. - rep_t absResult = quotient & significandMask; - // Insert the exponent. - absResult |= (rep_t)writtenExponent << significandBits; - // Round. - absResult += round; - // Insert the sign and return. - const fp_t result = fromRep(absResult | quotientSign); - return result; - } -} +COMPILER_RT_ABI fp_t __divtf3(fp_t a, fp_t b) { return __divXf3__(a, b); } #endif Index: compiler-rt/lib/builtins/divsf3.c =================================================================== --- compiler-rt/lib/builtins/divsf3.c +++ compiler-rt/lib/builtins/divsf3.c @@ -9,181 +9,17 @@ // This file implements single-precision soft-float division // with the IEEE-754 default rounding (to nearest, ties to even). // -// For simplicity, this implementation currently flushes denormals to zero. -// It should be a fairly straightforward exercise to implement gradual -// underflow with correct rounding. -// //===----------------------------------------------------------------------===// #define SINGLE_PRECISION -#include "fp_lib.h" - -COMPILER_RT_ABI fp_t __divsf3(fp_t a, fp_t b) { - - const unsigned int aExponent = toRep(a) >> significandBits & maxExponent; - const unsigned int bExponent = toRep(b) >> significandBits & maxExponent; - const rep_t quotientSign = (toRep(a) ^ toRep(b)) & signBit; - - rep_t aSignificand = toRep(a) & significandMask; - rep_t bSignificand = toRep(b) & significandMask; - int scale = 0; - - // Detect if a or b is zero, denormal, infinity, or NaN. - if (aExponent - 1U >= maxExponent - 1U || - bExponent - 1U >= maxExponent - 1U) { - - const rep_t aAbs = toRep(a) & absMask; - const rep_t bAbs = toRep(b) & absMask; - - // NaN / anything = qNaN - if (aAbs > infRep) - return fromRep(toRep(a) | quietBit); - // anything / NaN = qNaN - if (bAbs > infRep) - return fromRep(toRep(b) | quietBit); - - if (aAbs == infRep) { - // infinity / infinity = NaN - if (bAbs == infRep) - return fromRep(qnanRep); - // infinity / anything else = +/- infinity - else - return fromRep(aAbs | quotientSign); - } - - // anything else / infinity = +/- 0 - if (bAbs == infRep) - return fromRep(quotientSign); - - if (!aAbs) { - // zero / zero = NaN - if (!bAbs) - return fromRep(qnanRep); - // zero / anything else = +/- zero - else - return fromRep(quotientSign); - } - // anything else / zero = +/- infinity - if (!bAbs) - return fromRep(infRep | quotientSign); - - // One or both of a or b is denormal. The other (if applicable) is a - // normal number. Renormalize one or both of a and b, and set scale to - // include the necessary exponent adjustment. - if (aAbs < implicitBit) - scale += normalize(&aSignificand); - if (bAbs < implicitBit) - scale -= normalize(&bSignificand); - } - - // Set the implicit significand bit. If we fell through from the - // denormal path it was already set by normalize( ), but setting it twice - // won't hurt anything. - aSignificand |= implicitBit; - bSignificand |= implicitBit; - int quotientExponent = aExponent - bExponent + scale; - // 0x7504F333 / 2^32 + 1 = 3/4 + 1/sqrt(2) - - // Align the significand of b as a Q31 fixed-point number in the range - // [1, 2.0) and get a Q32 approximate reciprocal using a small minimax - // polynomial approximation: reciprocal = 3/4 + 1/sqrt(2) - b/2. This - // is accurate to about 3.5 binary digits. - uint32_t q31b = bSignificand << 8; - uint32_t reciprocal = UINT32_C(0x7504f333) - q31b; - - // Now refine the reciprocal estimate using a Newton-Raphson iteration: - // - // x1 = x0 * (2 - x0 * b) - // - // This doubles the number of correct binary digits in the approximation - // with each iteration. - uint32_t correction; - correction = -((uint64_t)reciprocal * q31b >> 32); - reciprocal = (uint64_t)reciprocal * correction >> 31; - correction = -((uint64_t)reciprocal * q31b >> 32); - reciprocal = (uint64_t)reciprocal * correction >> 31; - correction = -((uint64_t)reciprocal * q31b >> 32); - reciprocal = (uint64_t)reciprocal * correction >> 31; - - // Adust the final 32-bit reciprocal estimate downward to ensure that it is - // strictly smaller than the infinitely precise exact reciprocal. Because - // the computation of the Newton-Raphson step is truncating at every step, - // this adjustment is small; most of the work is already done. - reciprocal -= 2; - - // The numerical reciprocal is accurate to within 2^-28, lies in the - // interval [0x1.000000eep-1, 0x1.fffffffcp-1], and is strictly smaller - // than the true reciprocal of b. Multiplying a by this reciprocal thus - // gives a numerical q = a/b in Q24 with the following properties: - // - // 1. q < a/b - // 2. q is in the interval [0x1.000000eep-1, 0x1.fffffffcp0) - // 3. The error in q is at most 2^-24 + 2^-27 -- the 2^24 term comes - // from the fact that we truncate the product, and the 2^27 term - // is the error in the reciprocal of b scaled by the maximum - // possible value of a. As a consequence of this error bound, - // either q or nextafter(q) is the correctly rounded. - rep_t quotient = (uint64_t)reciprocal * (aSignificand << 1) >> 32; - - // Two cases: quotient is in [0.5, 1.0) or quotient is in [1.0, 2.0). - // In either case, we are going to compute a residual of the form - // - // r = a - q*b - // - // We know from the construction of q that r satisfies: - // - // 0 <= r < ulp(q)*b - // - // If r is greater than 1/2 ulp(q)*b, then q rounds up. Otherwise, we - // already have the correct result. The exact halfway case cannot occur. - // We also take this time to right shift quotient if it falls in the [1,2) - // range and adjust the exponent accordingly. - rep_t residual; - if (quotient < (implicitBit << 1)) { - residual = (aSignificand << 24) - quotient * bSignificand; - quotientExponent--; - } else { - quotient >>= 1; - residual = (aSignificand << 23) - quotient * bSignificand; - } - - const int writtenExponent = quotientExponent + exponentBias; - if (writtenExponent >= maxExponent) { - // If we have overflowed the exponent, return infinity. - return fromRep(infRep | quotientSign); - } +#define NUMBER_OF_HALF_ITERATIONS 0 +#define NUMBER_OF_FULL_ITERATIONS 3 +#define USE_NATIVE_FULL_ITERATIONS - else if (writtenExponent < 1) { - if (writtenExponent == 0) { - // Check whether the rounded result is normal. - const bool round = (residual << 1) > bSignificand; - // Clear the implicit bit. - rep_t absResult = quotient & significandMask; - // Round. - absResult += round; - if (absResult & ~significandMask) { - // The rounded result is normal; return it. - return fromRep(absResult | quotientSign); - } - } - // Flush denormals to zero. In the future, it would be nice to add - // code to round them correctly. - return fromRep(quotientSign); - } +#include "fp_div_impl.inc" - else { - const bool round = (residual << 1) > bSignificand; - // Clear the implicit bit. - rep_t absResult = quotient & significandMask; - // Insert the exponent. - absResult |= (rep_t)writtenExponent << significandBits; - // Round. - absResult += round; - // Insert the sign and return. - return fromRep(absResult | quotientSign); - } -} +COMPILER_RT_ABI fp_t __divsf3(fp_t a, fp_t b) { return __divXf3__(a, b); } #if defined(__ARM_EABI__) #if defined(COMPILER_RT_ARMHF_TARGET) Index: compiler-rt/lib/builtins/divdf3.c =================================================================== --- compiler-rt/lib/builtins/divdf3.c +++ compiler-rt/lib/builtins/divdf3.c @@ -9,197 +9,16 @@ // This file implements double-precision soft-float division // with the IEEE-754 default rounding (to nearest, ties to even). // -// For simplicity, this implementation currently flushes denormals to zero. -// It should be a fairly straightforward exercise to implement gradual -// underflow with correct rounding. -// //===----------------------------------------------------------------------===// #define DOUBLE_PRECISION -#include "fp_lib.h" - -COMPILER_RT_ABI fp_t __divdf3(fp_t a, fp_t b) { - - const unsigned int aExponent = toRep(a) >> significandBits & maxExponent; - const unsigned int bExponent = toRep(b) >> significandBits & maxExponent; - const rep_t quotientSign = (toRep(a) ^ toRep(b)) & signBit; - - rep_t aSignificand = toRep(a) & significandMask; - rep_t bSignificand = toRep(b) & significandMask; - int scale = 0; - - // Detect if a or b is zero, denormal, infinity, or NaN. - if (aExponent - 1U >= maxExponent - 1U || - bExponent - 1U >= maxExponent - 1U) { - - const rep_t aAbs = toRep(a) & absMask; - const rep_t bAbs = toRep(b) & absMask; - - // NaN / anything = qNaN - if (aAbs > infRep) - return fromRep(toRep(a) | quietBit); - // anything / NaN = qNaN - if (bAbs > infRep) - return fromRep(toRep(b) | quietBit); - - if (aAbs == infRep) { - // infinity / infinity = NaN - if (bAbs == infRep) - return fromRep(qnanRep); - // infinity / anything else = +/- infinity - else - return fromRep(aAbs | quotientSign); - } - - // anything else / infinity = +/- 0 - if (bAbs == infRep) - return fromRep(quotientSign); - - if (!aAbs) { - // zero / zero = NaN - if (!bAbs) - return fromRep(qnanRep); - // zero / anything else = +/- zero - else - return fromRep(quotientSign); - } - // anything else / zero = +/- infinity - if (!bAbs) - return fromRep(infRep | quotientSign); - - // One or both of a or b is denormal. The other (if applicable) is a - // normal number. Renormalize one or both of a and b, and set scale to - // include the necessary exponent adjustment. - if (aAbs < implicitBit) - scale += normalize(&aSignificand); - if (bAbs < implicitBit) - scale -= normalize(&bSignificand); - } - - // Set the implicit significand bit. If we fell through from the - // denormal path it was already set by normalize( ), but setting it twice - // won't hurt anything. - aSignificand |= implicitBit; - bSignificand |= implicitBit; - int quotientExponent = aExponent - bExponent + scale; - - // Align the significand of b as a Q31 fixed-point number in the range - // [1, 2.0) and get a Q32 approximate reciprocal using a small minimax - // polynomial approximation: reciprocal = 3/4 + 1/sqrt(2) - b/2. This - // is accurate to about 3.5 binary digits. - const uint32_t q31b = bSignificand >> 21; - uint32_t recip32 = UINT32_C(0x7504f333) - q31b; - // 0x7504F333 / 2^32 + 1 = 3/4 + 1/sqrt(2) - - // Now refine the reciprocal estimate using a Newton-Raphson iteration: - // - // x1 = x0 * (2 - x0 * b) - // - // This doubles the number of correct binary digits in the approximation - // with each iteration. - uint32_t correction32; - correction32 = -((uint64_t)recip32 * q31b >> 32); - recip32 = (uint64_t)recip32 * correction32 >> 31; - correction32 = -((uint64_t)recip32 * q31b >> 32); - recip32 = (uint64_t)recip32 * correction32 >> 31; - correction32 = -((uint64_t)recip32 * q31b >> 32); - recip32 = (uint64_t)recip32 * correction32 >> 31; - - // The reciprocal may have overflowed to zero if the upper half of b is - // exactly 1.0. This would sabatoge the full-width final stage of the - // computation that follows, so we adjust the reciprocal down by one bit. - recip32--; - - // We need to perform one more iteration to get us to 56 binary digits. - // The last iteration needs to happen with extra precision. - const uint32_t q63blo = bSignificand << 11; - uint64_t correction, reciprocal; - correction = -((uint64_t)recip32 * q31b + ((uint64_t)recip32 * q63blo >> 32)); - uint32_t cHi = correction >> 32; - uint32_t cLo = correction; - reciprocal = (uint64_t)recip32 * cHi + ((uint64_t)recip32 * cLo >> 32); - - // Adjust the final 64-bit reciprocal estimate downward to ensure that it is - // strictly smaller than the infinitely precise exact reciprocal. Because - // the computation of the Newton-Raphson step is truncating at every step, - // this adjustment is small; most of the work is already done. - reciprocal -= 2; - - // The numerical reciprocal is accurate to within 2^-56, lies in the - // interval [0.5, 1.0), and is strictly smaller than the true reciprocal - // of b. Multiplying a by this reciprocal thus gives a numerical q = a/b - // in Q53 with the following properties: - // - // 1. q < a/b - // 2. q is in the interval [0.5, 2.0) - // 3. The error in q is bounded away from 2^-53 (actually, we have a - // couple of bits to spare, but this is all we need). - - // We need a 64 x 64 multiply high to compute q, which isn't a basic - // operation in C, so we need to be a little bit fussy. - rep_t quotient, quotientLo; - wideMultiply(aSignificand << 2, reciprocal, "ient, "ientLo); - - // Two cases: quotient is in [0.5, 1.0) or quotient is in [1.0, 2.0). - // In either case, we are going to compute a residual of the form - // - // r = a - q*b - // - // We know from the construction of q that r satisfies: - // - // 0 <= r < ulp(q)*b - // - // If r is greater than 1/2 ulp(q)*b, then q rounds up. Otherwise, we - // already have the correct result. The exact halfway case cannot occur. - // We also take this time to right shift quotient if it falls in the [1,2) - // range and adjust the exponent accordingly. - rep_t residual; - if (quotient < (implicitBit << 1)) { - residual = (aSignificand << 53) - quotient * bSignificand; - quotientExponent--; - } else { - quotient >>= 1; - residual = (aSignificand << 52) - quotient * bSignificand; - } - - const int writtenExponent = quotientExponent + exponentBias; - if (writtenExponent >= maxExponent) { - // If we have overflowed the exponent, return infinity. - return fromRep(infRep | quotientSign); - } +#define NUMBER_OF_HALF_ITERATIONS 3 +#define NUMBER_OF_FULL_ITERATIONS 1 - else if (writtenExponent < 1) { - if (writtenExponent == 0) { - // Check whether the rounded result is normal. - const bool round = (residual << 1) > bSignificand; - // Clear the implicit bit. - rep_t absResult = quotient & significandMask; - // Round. - absResult += round; - if (absResult & ~significandMask) { - // The rounded result is normal; return it. - return fromRep(absResult | quotientSign); - } - } - // Flush denormals to zero. In the future, it would be nice to add - // code to round them correctly. - return fromRep(quotientSign); - } +#include "fp_div_impl.inc" - else { - const bool round = (residual << 1) > bSignificand; - // Clear the implicit bit. - rep_t absResult = quotient & significandMask; - // Insert the exponent. - absResult |= (rep_t)writtenExponent << significandBits; - // Round. - absResult += round; - // Insert the sign and return. - const double result = fromRep(absResult | quotientSign); - return result; - } -} +COMPILER_RT_ABI fp_t __divdf3(fp_t a, fp_t b) { return __divXf3__(a, b); } #if defined(__ARM_EABI__) #if defined(COMPILER_RT_ARMHF_TARGET)
_______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits