--- mingw-w64-crt/Makefile.am | 4 ++-- mingw-w64-crt/math/fma.S | 42 -------------------------------- mingw-w64-crt/math/fma.c | 12 ++++++++++ mingw-w64-crt/math/fmaf.S | 43 --------------------------------- mingw-w64-crt/math/fmaf.c | 10 ++++++++ mingw-w64-crt/math/fmal.c | 61 ++++++++++++++++++++++++++++++++++++++++++++++- 6 files changed, 84 insertions(+), 88 deletions(-) delete mode 100644 mingw-w64-crt/math/fma.S create mode 100644 mingw-w64-crt/math/fma.c delete mode 100644 mingw-w64-crt/math/fmaf.S create mode 100644 mingw-w64-crt/math/fmaf.c
diff --git a/mingw-w64-crt/Makefile.am b/mingw-w64-crt/Makefile.am index 886fcf0..1c6e534 100644 --- a/mingw-w64-crt/Makefile.am +++ b/mingw-w64-crt/Makefile.am @@ -244,7 +244,6 @@ src_libmingwex=\ \ math/_chgsignl.S math/ceil.S math/ceilf.S math/ceill.S math/copysignl.S \ math/floor.S math/floorf.S math/floorl.S \ - math/fma.S math/fmaf.S \ math/nearbyint.S math/nearbyintf.S math/nearbyintl.S \ math/trunc.S math/truncf.S \ math/cbrt.c \ @@ -252,7 +251,8 @@ src_libmingwex=\ math/coshf.c math/coshl.c math/erfl.c \ math/expf.c \ math/fabs.c math/fabsf.c math/fabsl.c math/fdim.c math/fdimf.c math/fdiml.c \ - math/fmal.c math/fmax.c math/fmaxf.c math/fmaxl.c math/fmin.c math/fminf.c \ + math/fma.c math/fmaf.c math/fmal.c \ + math/fmax.c math/fmaxf.c math/fmaxl.c math/fmin.c math/fminf.c \ math/fminl.c math/fp_consts.c math/fp_constsf.c \ math/fp_constsl.c math/fpclassify.c math/fpclassifyf.c math/fpclassifyl.c math/frexpf.c \ math/hypotf.c math/hypot.c math/hypotl.c math/isnan.c math/isnanf.c math/isnanl.c \ diff --git a/mingw-w64-crt/math/fma.S b/mingw-w64-crt/math/fma.S deleted file mode 100644 index 74becde..0000000 --- a/mingw-w64-crt/math/fma.S +++ /dev/null @@ -1,42 +0,0 @@ -/** - * This file has no copyright assigned and is placed in the Public Domain. - * This file is part of the mingw-w64 runtime package. - * No warranty is given; refer to the file DISCLAIMER.PD within this package. - */ -#include <_mingw_mac.h> - - .file "fma.S" - .text -#ifdef __x86_64__ - .align 8 -#else - .align 4 -#endif - .p2align 4,,15 - .globl __MINGW_USYMBOL(fma) - .def __MINGW_USYMBOL(fma); .scl 2; .type 32; .endef -__MINGW_USYMBOL(fma): -#if defined(_AMD64_) || defined(__x86_64__) - subq $56, %rsp - movsd %xmm0,(%rsp) - movsd %xmm1,16(%rsp) - movsd %xmm2,32(%rsp) - fldl (%rsp) - fmull 16(%rsp) - fldl 32(%rsp) - faddp - fstpl (%rsp) - movsd (%rsp),%xmm0 - addq $56, %rsp - ret -#elif defined(_ARM_) || defined(__arm__) - fmacd d2, d0, d1 - fcpyd d0, d2 - bx lr -#elif defined(_X86_) || defined(__i386__) - fldl 4(%esp) - fmull 12(%esp) - fldl 20(%esp) - faddp - ret -#endif diff --git a/mingw-w64-crt/math/fma.c b/mingw-w64-crt/math/fma.c new file mode 100644 index 0000000..3703e00 --- /dev/null +++ b/mingw-w64-crt/math/fma.c @@ -0,0 +1,12 @@ +/** + * This file has no copyright assigned and is placed in the Public Domain. + * This file is part of the mingw-w64 runtime package. + * No warranty is given; refer to the file DISCLAIMER.PD within this package. + */ +long double fmal ( long double _x, long double _y, long double _z); + +double +fma ( double _x, double _y, double _z) +{ + return (double)fmal(_x, _y, _z); +} diff --git a/mingw-w64-crt/math/fmaf.S b/mingw-w64-crt/math/fmaf.S deleted file mode 100644 index 6bc7ef0..0000000 --- a/mingw-w64-crt/math/fmaf.S +++ /dev/null @@ -1,43 +0,0 @@ -/** - * This file has no copyright assigned and is placed in the Public Domain. - * This file is part of the mingw-w64 runtime package. - * No warranty is given; refer to the file DISCLAIMER.PD within this package. - */ -#include <_mingw_mac.h> - - .file "fmaf.S" - .text -#ifdef __x86_64__ - .align 8 -#else - .align 2 -#endif - .p2align 4,,15 - .globl __MINGW_USYMBOL(fmaf) - .def __MINGW_USYMBOL(fmaf); .scl 2; .type 32; .endef -__MINGW_USYMBOL(fmaf): -#if defined(_AMD64_) || defined(__x86_64__) - subq $56, %rsp - movss %xmm0,(%rsp) - movss %xmm1,16(%rsp) - movss %xmm2,32(%rsp) - flds (%rsp) - fmuls 16(%rsp) - flds 32(%rsp) - faddp - fstps (%rsp) - movss (%rsp),%xmm0 - addq $56, %rsp - ret -#elif defined(_ARM_) || defined(__arm__) - fmacs s2, s0, s1 - fcpys s0, s2 - bx lr -#elif defined(_X86_) || defined(__i386__) - flds 4(%esp) - fmuls 8(%esp) - flds 12(%esp) - faddp - ret -#endif - diff --git a/mingw-w64-crt/math/fmaf.c b/mingw-w64-crt/math/fmaf.c new file mode 100644 index 0000000..70a396e --- /dev/null +++ b/mingw-w64-crt/math/fmaf.c @@ -0,0 +1,10 @@ +/** + * This file has no copyright assigned and is placed in the Public Domain. + * This file is part of the mingw-w64 runtime package. + * No warranty is given; refer to the file DISCLAIMER.PD within this package. + */ +float +fmaf ( float _x, float _y, float _z) +{ + return (double)_x * (double)_y + (double)_z; +} diff --git a/mingw-w64-crt/math/fmal.c b/mingw-w64-crt/math/fmal.c index 3d0eb02..5cf21f8 100644 --- a/mingw-w64-crt/math/fmal.c +++ b/mingw-w64-crt/math/fmal.c @@ -3,10 +3,69 @@ * This file is part of the mingw-w64 runtime package. * No warranty is given; refer to the file DISCLAIMER.PD within this package. */ +#include <stdint.h> +#include <stdbool.h> + long double fmal ( long double _x, long double _y, long double _z); +// https://en.wikipedia.org/wiki/Extended_precision#x86_Extended_Precision_Format +typedef union x87reg_ { + long double f; + struct __attribute__((__packed__)) { + uint64_t f64 : 64; // fraction + uint16_t exp : 15; // exponent + uint16_t sgn : 1; // sign + }; + struct __attribute__((__packed__)) { + uint32_t flo : 32; + uint32_t fhi : 32; +// uint16_t exp : 15; +// uint16_t sgn : 1; + }; +} x87reg; + +static inline void break_down(x87reg *restrict lo, x87reg *restrict hi, long double x){ + hi->f = x; + const uint32_t flo = hi->flo; + const uint32_t exp = hi->exp; + const bool sgn = hi->sgn; + // Erase low-order significant bits. `hi->f` now has only 32 significant bits. + hi->flo = 0; + + if(flo == 0){ + // If the low-order significant bits are all zeroes, return zero in `lo->f`. + lo->f64 = 0; + lo->exp = 0; + } else { + // How many bits should we shift to normalize the floating point value? + // We assume `long` has 32 bits here. + const unsigned shn = (unsigned)__builtin_clzl(flo) + 32; + if(shn < exp){ + // `x` can be normalized! + lo->f64 = (uint64_t)flo << shn; // The MSB becomes one. + lo->exp = (exp - shn) & 0x7FFF; // Don'e forget to adjust the exponent. + } else { + // There are so few bits that `x` can't be normalized. Go with a denormal number. + lo->f64 = (uint64_t)flo << exp; // The MSB is zero. + lo->exp = 0; + } + } + lo->sgn = sgn; +} +static inline long double fpu_fma(long double x, long double y, long double z){ + x87reg xlo, xhi, ylo, yhi; + break_down(&xlo, &xhi, x); + break_down(&ylo, &yhi, y); + // The order of these four statements is essential. Don't move them around. + long double ret = z; + ret += xhi.f * yhi.f; // This is the most significant item. + ret += xhi.f * ylo.f + xlo.f * yhi.f; // They are equally significant. + ret += xlo.f * ylo.f; // This is the least significant item. + return ret; +} + long double fmal ( long double _x, long double _y, long double _z) { - return ((_x * _y) + _z); + return fpu_fma(_x, _y, _z); } -- 2.9.1
------------------------------------------------------------------------------
_______________________________________________ Mingw-w64-public mailing list Mingw-w64-public@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/mingw-w64-public