hfinkel created this revision.
hfinkel added reviewers: mclow.lists, EricWF, chandlerc.
hfinkel added a subscriber: cfe-commits.
Herald added a subscriber: mcrosier.

The libc-provided isnan/isinf/isfinite macro implementations are specifically 
designed to function correctly, even in the presence of -ffast-math (or, more 
specifically, -ffinite-math-only). As such, on most implementation, these 
either always turn into external function calls (e.g. glibc) or are 
specifically function calls when __FINITE_MATH_ONLY__ is defined (e.g. Darwin).

Our implementation of complex arithmetic make heavy use of isnan/isinf/isfinite 
to deal with corner cases involving non-finite quantities. This is problematic 
in two respects:

 1. On systems where these are always function calls (e.g. Linux/glibc), there 
is a performance penalty
 2. When compiling with -ffast-math, there is a significant performance penalty 
(in fact, on Darwin and systems with similar implementations, the code may in 
fact be slower than not using -ffast-math, because the inline definitions 
provided by libc become unavailable to prevent the checks from being optimized 
out).

Eliding these inf/nan checks in -ffast-math mode is consistent with what 
happens with libstdc++, and in my experience, what users expect. This is 
critical to getting high-performance code when using complex<T>. This patch 
replaces uses of those functions on basic floating-point types with calls to 
__builtin_isnan/isinf/isfinite, which Clang will always expand inline. When 
using -ffast-math (or  -ffinite-math-only), the optimizer will remove the 
checks as expected.

http://reviews.llvm.org/D18639

Files:
  include/cmath
  include/complex

Index: include/complex
===================================================================
--- include/complex
+++ include/complex
@@ -596,39 +596,39 @@
     _Tp __bc = __b * __c;
     _Tp __x = __ac - __bd;
     _Tp __y = __ad + __bc;
-    if (isnan(__x) && isnan(__y))
+    if (__fast_isnan(__x) && __fast_isnan(__y))
     {
         bool __recalc = false;
-        if (isinf(__a) || isinf(__b))
+        if (__fast_isinf(__a) || __fast_isinf(__b))
         {
-            __a = copysign(isinf(__a) ? _Tp(1) : _Tp(0), __a);
-            __b = copysign(isinf(__b) ? _Tp(1) : _Tp(0), __b);
-            if (isnan(__c))
+            __a = copysign(__fast_isinf(__a) ? _Tp(1) : _Tp(0), __a);
+            __b = copysign(__fast_isinf(__b) ? _Tp(1) : _Tp(0), __b);
+            if (__fast_isnan(__c))
                 __c = copysign(_Tp(0), __c);
-            if (isnan(__d))
+            if (__fast_isnan(__d))
                 __d = copysign(_Tp(0), __d);
             __recalc = true;
         }
-        if (isinf(__c) || isinf(__d))
+        if (__fast_isinf(__c) || __fast_isinf(__d))
         {
-            __c = copysign(isinf(__c) ? _Tp(1) : _Tp(0), __c);
-            __d = copysign(isinf(__d) ? _Tp(1) : _Tp(0), __d);
-            if (isnan(__a))
+            __c = copysign(__fast_isinf(__c) ? _Tp(1) : _Tp(0), __c);
+            __d = copysign(__fast_isinf(__d) ? _Tp(1) : _Tp(0), __d);
+            if (__fast_isnan(__a))
                 __a = copysign(_Tp(0), __a);
-            if (isnan(__b))
+            if (__fast_isnan(__b))
                 __b = copysign(_Tp(0), __b);
             __recalc = true;
         }
-        if (!__recalc && (isinf(__ac) || isinf(__bd) ||
-                          isinf(__ad) || isinf(__bc)))
+        if (!__recalc && (__fast_isinf(__ac) || __fast_isinf(__bd) ||
+                          __fast_isinf(__ad) || __fast_isinf(__bc)))
         {
-            if (isnan(__a))
+            if (__fast_isnan(__a))
                 __a = copysign(_Tp(0), __a);
-            if (isnan(__b))
+            if (__fast_isnan(__b))
                 __b = copysign(_Tp(0), __b);
-            if (isnan(__c))
+            if (__fast_isnan(__c))
                 __c = copysign(_Tp(0), __c);
-            if (isnan(__d))
+            if (__fast_isnan(__d))
                 __d = copysign(_Tp(0), __d);
             __recalc = true;
         }
@@ -671,33 +671,33 @@
     _Tp __c = __w.real();
     _Tp __d = __w.imag();
     _Tp __logbw = logb(fmax(fabs(__c), fabs(__d)));
-    if (isfinite(__logbw))
+    if (__fast_isfinite(__logbw))
     {
         __ilogbw = static_cast<int>(__logbw);
         __c = scalbn(__c, -__ilogbw);
         __d = scalbn(__d, -__ilogbw);
     }
     _Tp __denom = __c * __c + __d * __d;
     _Tp __x = scalbn((__a * __c + __b * __d) / __denom, -__ilogbw);
     _Tp __y = scalbn((__b * __c - __a * __d) / __denom, -__ilogbw);
-    if (isnan(__x) && isnan(__y))
+    if (__fast_isnan(__x) && __fast_isnan(__y))
     {
-        if ((__denom == _Tp(0)) && (!isnan(__a) || !isnan(__b)))
+        if ((__denom == _Tp(0)) && (!__fast_isnan(__a) || !__fast_isnan(__b)))
         {
             __x = copysign(_Tp(INFINITY), __c) * __a;
             __y = copysign(_Tp(INFINITY), __c) * __b;
         }
-        else if ((isinf(__a) || isinf(__b)) && isfinite(__c) && isfinite(__d))
+        else if ((__fast_isinf(__a) || __fast_isinf(__b)) && __fast_isfinite(__c) && __fast_isfinite(__d))
         {
-            __a = copysign(isinf(__a) ? _Tp(1) : _Tp(0), __a);
-            __b = copysign(isinf(__b) ? _Tp(1) : _Tp(0), __b);
+            __a = copysign(__fast_isinf(__a) ? _Tp(1) : _Tp(0), __a);
+            __b = copysign(__fast_isinf(__b) ? _Tp(1) : _Tp(0), __b);
             __x = _Tp(INFINITY) * (__a * __c + __b * __d);
             __y = _Tp(INFINITY) * (__b * __c - __a * __d);
         }
-        else if (isinf(__logbw) && __logbw > _Tp(0) && isfinite(__a) && isfinite(__b))
+        else if (__fast_isinf(__logbw) && __logbw > _Tp(0) && __fast_isfinite(__a) && __fast_isfinite(__b))
         {
-            __c = copysign(isinf(__c) ? _Tp(1) : _Tp(0), __c);
-            __d = copysign(isinf(__d) ? _Tp(1) : _Tp(0), __d);
+            __c = copysign(__fast_isinf(__c) ? _Tp(1) : _Tp(0), __c);
+            __d = copysign(__fast_isinf(__d) ? _Tp(1) : _Tp(0), __d);
             __x = _Tp(0) * (__a * __c + __b * __d);
             __y = _Tp(0) * (__b * __c - __a * __d);
         }
@@ -935,9 +935,9 @@
 _Tp
 norm(const complex<_Tp>& __c)
 {
-    if (isinf(__c.real()))
+    if (__fast_isinf(__c.real()))
         return abs(__c.real());
-    if (isinf(__c.imag()))
+    if (__fast_isinf(__c.imag()))
         return abs(__c.imag());
     return __c.real() * __c.real() + __c.imag() * __c.imag();
 }
@@ -1026,25 +1026,25 @@
 proj(const complex<_Tp>& __c)
 {
     std::complex<_Tp> __r = __c;
-    if (isinf(__c.real()) || isinf(__c.imag()))
+    if (__fast_isinf(__c.real()) || __fast_isinf(__c.imag()))
         __r = complex<_Tp>(INFINITY, copysign(_Tp(0), __c.imag()));
     return __r;
 }
 
 inline _LIBCPP_INLINE_VISIBILITY
 complex<long double>
 proj(long double __re)
 {
-    if (isinf(__re))
+    if (__fast_isinf(__re))
         __re = abs(__re);
     return complex<long double>(__re);
 }
 
 inline _LIBCPP_INLINE_VISIBILITY
 complex<double>
 proj(double __re)
 {
-    if (isinf(__re))
+    if (__fast_isinf(__re))
         __re = abs(__re);
     return complex<double>(__re);
 }
@@ -1065,7 +1065,7 @@
 complex<float>
 proj(float __re)
 {
-    if (isinf(__re))
+    if (__fast_isinf(__re))
         __re = abs(__re);
     return complex<float>(__re);
 }
@@ -1076,25 +1076,25 @@
 complex<_Tp>
 polar(const _Tp& __rho, const _Tp& __theta = _Tp(0))
 {
-    if (isnan(__rho) || signbit(__rho))
+    if (__fast_isnan(__rho) || signbit(__rho))
         return complex<_Tp>(_Tp(NAN), _Tp(NAN));
-    if (isnan(__theta))
+    if (__fast_isnan(__theta))
     {
-        if (isinf(__rho))
+        if (__fast_isinf(__rho))
             return complex<_Tp>(__rho, __theta);
         return complex<_Tp>(__theta, __theta);
     }
-    if (isinf(__theta))
+    if (__fast_isinf(__theta))
     {
-        if (isinf(__rho))
+        if (__fast_isinf(__rho))
             return complex<_Tp>(__rho, _Tp(NAN));
         return complex<_Tp>(_Tp(NAN), _Tp(NAN));
     }
     _Tp __x = __rho * cos(__theta);
-    if (isnan(__x))
+    if (__fast_isnan(__x))
         __x = 0;
     _Tp __y = __rho * sin(__theta);
-    if (isnan(__y))
+    if (__fast_isnan(__y))
         __y = 0;
     return complex<_Tp>(__x, __y);
 }
@@ -1125,13 +1125,13 @@
 complex<_Tp>
 sqrt(const complex<_Tp>& __x)
 {
-    if (isinf(__x.imag()))
+    if (__fast_isinf(__x.imag()))
         return complex<_Tp>(_Tp(INFINITY), __x.imag());
-    if (isinf(__x.real()))
+    if (__fast_isinf(__x.real()))
     {
         if (__x.real() > _Tp(0))
-            return complex<_Tp>(__x.real(), isnan(__x.imag()) ? __x.imag() : copysign(_Tp(0), __x.imag()));
-        return complex<_Tp>(isnan(__x.imag()) ? __x.imag() : _Tp(0), copysign(__x.real(), __x.imag()));
+            return complex<_Tp>(__x.real(), __fast_isnan(__x.imag()) ? __x.imag() : copysign(_Tp(0), __x.imag()));
+        return complex<_Tp>(__fast_isnan(__x.imag()) ? __x.imag() : _Tp(0), copysign(__x.real(), __x.imag()));
     }
     return polar(sqrt(abs(__x)), arg(__x) / _Tp(2));
 }
@@ -1143,21 +1143,21 @@
 exp(const complex<_Tp>& __x)
 {
     _Tp __i = __x.imag();
-    if (isinf(__x.real()))
+    if (__fast_isinf(__x.real()))
     {
         if (__x.real() < _Tp(0))
         {
-            if (!isfinite(__i))
+            if (!__fast_isfinite(__i))
                 __i = _Tp(1);
         }
-        else if (__i == 0 || !isfinite(__i))
+        else if (__i == 0 || !__fast_isfinite(__i))
         {
-            if (isinf(__i))
+            if (__fast_isinf(__i))
                 __i = _Tp(NAN);
             return complex<_Tp>(__x.real(), __i);
         }
     }
-    else if (isnan(__x.real()) && __x.imag() == 0)
+    else if (__fast_isnan(__x.real()) && __x.imag() == 0)
         return __x;
     _Tp __e = exp(__x.real());
     return complex<_Tp>(__e * cos(__i), __e * sin(__i));
@@ -1215,23 +1215,23 @@
 asinh(const complex<_Tp>& __x)
 {
     const _Tp __pi(atan2(+0., -0.));
-    if (isinf(__x.real()))
+    if (__fast_isinf(__x.real()))
     {
-        if (isnan(__x.imag()))
+        if (__fast_isnan(__x.imag()))
             return __x;
-        if (isinf(__x.imag()))
+        if (__fast_isinf(__x.imag()))
             return complex<_Tp>(__x.real(), copysign(__pi * _Tp(0.25), __x.imag()));
         return complex<_Tp>(__x.real(), copysign(_Tp(0), __x.imag()));
     }
-    if (isnan(__x.real()))
+    if (__fast_isnan(__x.real()))
     {
-        if (isinf(__x.imag()))
+        if (__fast_isinf(__x.imag()))
             return complex<_Tp>(__x.imag(), __x.real());
         if (__x.imag() == 0)
             return __x;
         return complex<_Tp>(__x.real(), __x.real());
     }
-    if (isinf(__x.imag()))
+    if (__fast_isinf(__x.imag()))
         return complex<_Tp>(copysign(__x.imag(), __x.real()), copysign(__pi/_Tp(2), __x.imag()));
     complex<_Tp> __z = log(__x + sqrt(pow(__x, _Tp(2)) + _Tp(1)));
     return complex<_Tp>(copysign(__z.real(), __x.real()), copysign(__z.imag(), __x.imag()));
@@ -1244,11 +1244,11 @@
 acosh(const complex<_Tp>& __x)
 {
     const _Tp __pi(atan2(+0., -0.));
-    if (isinf(__x.real()))
+    if (__fast_isinf(__x.real()))
     {
-        if (isnan(__x.imag()))
+        if (__fast_isnan(__x.imag()))
             return complex<_Tp>(abs(__x.real()), __x.imag());
-        if (isinf(__x.imag()))
+        if (__fast_isinf(__x.imag()))
         {
             if (__x.real() > 0)
                 return complex<_Tp>(__x.real(), copysign(__pi * _Tp(0.25), __x.imag()));
@@ -1259,13 +1259,13 @@
             return complex<_Tp>(-__x.real(), copysign(__pi, __x.imag()));
         return complex<_Tp>(__x.real(), copysign(_Tp(0), __x.imag()));
     }
-    if (isnan(__x.real()))
+    if (__fast_isnan(__x.real()))
     {
-        if (isinf(__x.imag()))
+        if (__fast_isinf(__x.imag()))
             return complex<_Tp>(abs(__x.imag()), __x.real());
         return complex<_Tp>(__x.real(), __x.real());
     }
-    if (isinf(__x.imag()))
+    if (__fast_isinf(__x.imag()))
         return complex<_Tp>(abs(__x.imag()), copysign(__pi/_Tp(2), __x.imag()));
     complex<_Tp> __z = log(__x + sqrt(pow(__x, _Tp(2)) - _Tp(1)));
     return complex<_Tp>(copysign(__z.real(), _Tp(0)), copysign(__z.imag(), __x.imag()));
@@ -1278,21 +1278,21 @@
 atanh(const complex<_Tp>& __x)
 {
     const _Tp __pi(atan2(+0., -0.));
-    if (isinf(__x.imag()))
+    if (__fast_isinf(__x.imag()))
     {
         return complex<_Tp>(copysign(_Tp(0), __x.real()), copysign(__pi/_Tp(2), __x.imag()));
     }
-    if (isnan(__x.imag()))
+    if (__fast_isnan(__x.imag()))
     {
-        if (isinf(__x.real()) || __x.real() == 0)
+        if (__fast_isinf(__x.real()) || __x.real() == 0)
             return complex<_Tp>(copysign(_Tp(0), __x.real()), __x.imag());
         return complex<_Tp>(__x.imag(), __x.imag());
     }
-    if (isnan(__x.real()))
+    if (__fast_isnan(__x.real()))
     {
         return complex<_Tp>(__x.real(), __x.real());
     }
-    if (isinf(__x.real()))
+    if (__fast_isinf(__x.real()))
     {
         return complex<_Tp>(copysign(_Tp(0), __x.real()), copysign(__pi/_Tp(2), __x.imag()));
     }
@@ -1310,11 +1310,11 @@
 complex<_Tp>
 sinh(const complex<_Tp>& __x)
 {
-    if (isinf(__x.real()) && !isfinite(__x.imag()))
+    if (__fast_isinf(__x.real()) && !__fast_isfinite(__x.imag()))
         return complex<_Tp>(__x.real(), _Tp(NAN));
-    if (__x.real() == 0 && !isfinite(__x.imag()))
+    if (__x.real() == 0 && !__fast_isfinite(__x.imag()))
         return complex<_Tp>(__x.real(), _Tp(NAN));
-    if (__x.imag() == 0 && !isfinite(__x.real()))
+    if (__x.imag() == 0 && !__fast_isfinite(__x.real()))
         return __x;
     return complex<_Tp>(sinh(__x.real()) * cos(__x.imag()), cosh(__x.real()) * sin(__x.imag()));
 }
@@ -1325,13 +1325,13 @@
 complex<_Tp>
 cosh(const complex<_Tp>& __x)
 {
-    if (isinf(__x.real()) && !isfinite(__x.imag()))
+    if (__fast_isinf(__x.real()) && !__fast_isfinite(__x.imag()))
         return complex<_Tp>(abs(__x.real()), _Tp(NAN));
-    if (__x.real() == 0 && !isfinite(__x.imag()))
+    if (__x.real() == 0 && !__fast_isfinite(__x.imag()))
         return complex<_Tp>(_Tp(NAN), __x.real());
     if (__x.real() == 0 && __x.imag() == 0)
         return complex<_Tp>(_Tp(1), __x.imag());
-    if (__x.imag() == 0 && !isfinite(__x.real()))
+    if (__x.imag() == 0 && !__fast_isfinite(__x.real()))
         return complex<_Tp>(abs(__x.real()), __x.imag());
     return complex<_Tp>(cosh(__x.real()) * cos(__x.imag()), sinh(__x.real()) * sin(__x.imag()));
 }
@@ -1342,19 +1342,19 @@
 complex<_Tp>
 tanh(const complex<_Tp>& __x)
 {
-    if (isinf(__x.real()))
+    if (__fast_isinf(__x.real()))
     {
-        if (!isfinite(__x.imag()))
+        if (!__fast_isfinite(__x.imag()))
             return complex<_Tp>(_Tp(1), _Tp(0));
         return complex<_Tp>(_Tp(1), copysign(_Tp(0), sin(_Tp(2) * __x.imag())));
     }
-    if (isnan(__x.real()) && __x.imag() == 0)
+    if (__fast_isnan(__x.real()) && __x.imag() == 0)
         return __x;
     _Tp __2r(_Tp(2) * __x.real());
     _Tp __2i(_Tp(2) * __x.imag());
     _Tp __d(cosh(__2r) + cos(__2i));
     _Tp __2rsh(sinh(__2r));
-    if (isinf(__2rsh) && isinf(__d))
+    if (__fast_isinf(__2rsh) && __fast_isinf(__d))
         return complex<_Tp>(__2rsh > _Tp(0) ? _Tp(1) : _Tp(-1),
                             __2i > _Tp(0) ? _Tp(0) : _Tp(-0.));
     return  complex<_Tp>(__2rsh/__d, sin(__2i)/__d);
@@ -1377,11 +1377,11 @@
 acos(const complex<_Tp>& __x)
 {
     const _Tp __pi(atan2(+0., -0.));
-    if (isinf(__x.real()))
+    if (__fast_isinf(__x.real()))
     {
-        if (isnan(__x.imag()))
+        if (__fast_isnan(__x.imag()))
             return complex<_Tp>(__x.imag(), __x.real());
-        if (isinf(__x.imag()))
+        if (__fast_isinf(__x.imag()))
         {
             if (__x.real() < _Tp(0))
                 return complex<_Tp>(_Tp(0.75) * __pi, -__x.imag());
@@ -1391,13 +1391,13 @@
             return complex<_Tp>(__pi, signbit(__x.imag()) ? -__x.real() : __x.real());
         return complex<_Tp>(_Tp(0), signbit(__x.imag()) ? __x.real() : -__x.real());
     }
-    if (isnan(__x.real()))
+    if (__fast_isnan(__x.real()))
     {
-        if (isinf(__x.imag()))
+        if (__fast_isinf(__x.imag()))
             return complex<_Tp>(__x.real(), -__x.imag());
         return complex<_Tp>(__x.real(), __x.real());
     }
-    if (isinf(__x.imag()))
+    if (__fast_isinf(__x.imag()))
         return complex<_Tp>(__pi/_Tp(2), -__x.imag());
     if (__x.real() == 0)
         return complex<_Tp>(__pi/_Tp(2), -__x.imag());
Index: include/cmath
===================================================================
--- include/cmath
+++ include/cmath
@@ -548,6 +548,90 @@
 using ::lgammaf;
 #endif // __sun__
 
+#if __has_builtin(__builtin_isnan)
+template <class _A1>
+_LIBCPP_ALWAYS_INLINE
+typename std::enable_if<std::is_floating_point<_A1>::value, bool>::type
+__fast_isnan(_A1 __lcpp_x) _NOEXCEPT
+{
+    return __builtin_isnan(__lcpp_x);
+}
+
+template <class _A1>
+_LIBCPP_ALWAYS_INLINE
+typename std::enable_if<!std::is_floating_point<_A1>::value, bool>::type
+__fast_isnan(_A1 __lcpp_x) _NOEXCEPT
+{
+    return isnan(__lcpp_x);
+}
+
+#else
+
+template <class _A1>
+_LIBCPP_ALWAYS_INLINE
+bool
+__fast_isnan(_A1 __lcpp_x) _NOEXCEPT
+{
+  return isnan(__lcpp_x);
+}
+#endif
+
+#if __has_builtin(__builtin_isinf)
+template <class _A1>
+_LIBCPP_ALWAYS_INLINE
+typename std::enable_if<std::is_floating_point<_A1>::value, bool>::type
+__fast_isinf(_A1 __lcpp_x) _NOEXCEPT
+{
+    return __builtin_isinf(__lcpp_x);
+}
+
+template <class _A1>
+_LIBCPP_ALWAYS_INLINE
+typename std::enable_if<!std::is_floating_point<_A1>::value, bool>::type
+__fast_isinf(_A1 __lcpp_x) _NOEXCEPT
+{
+    return isinf(__lcpp_x);
+}
+
+#else
+
+template <class _A1>
+_LIBCPP_ALWAYS_INLINE
+bool
+__fast_isinf(_A1 __lcpp_x) _NOEXCEPT
+{
+  return isinf(__lcpp_x);
+}
+#endif
+
+#if __has_builtin(__builtin_isfinite)
+template <class _A1>
+_LIBCPP_ALWAYS_INLINE
+typename std::enable_if<std::is_floating_point<_A1>::value, bool>::type
+__fast_isfinite(_A1 __lcpp_x) _NOEXCEPT
+{
+    return __builtin_isfinite(__lcpp_x);
+}
+
+template <class _A1>
+_LIBCPP_ALWAYS_INLINE
+typename std::enable_if<!std::is_floating_point<_A1>::value, bool>::type
+__fast_isfinite(_A1 __lcpp_x) _NOEXCEPT
+{
+    return isfinite(__lcpp_x);
+}
+
+#else
+
+template <class _A1>
+_LIBCPP_ALWAYS_INLINE
+bool
+__fast_isfinite(_A1 __lcpp_x) _NOEXCEPT
+{
+  return isfinite(__lcpp_x);
+}
+#endif
+
 _LIBCPP_END_NAMESPACE_STD
 
 #endif  // _LIBCPP_CMATH
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to