optimizing totalorder

Bruno Haible Sun, 15 Oct 2023 07:59:35 -0700

With the new benchmark in place, I measured the run time of
  - the glibc 2.35 implementation of totalorder,
  - the gnulib implementation (picked by configuring with
      gl_cv_func_totalorder_in_libm=no gl_cv_func_totalorder_no_libm=no \
      gl_cv_func_totalorderf_in_libm=no gl_cv_func_totalorderf_no_libm=no \
      gl_cv_func_totalorderl_in_libm=no gl_cv_func_totalorderl_no_libm=no \
  - the gnulib implementation with some disabled NaN tests.
    This change (see attached patch) is correct: it still passes the unit
    tests.


Here are the running times (on x86_64) of "./bench-totalorder fdl 1000000":

                     f       d       l

glibc              1.816   1.671   2.078
gnulib             1.445   1.425   8.690
gnulib with patch  1.798   1.974  14.032

Conclusion:
  * My patch is a slowdown. It apparently "optimized" the fast path away. :-D
  * The gnulib implementation is significantly faster than glibc, except for
    the long-double case. I'll redo the measurements on various CPU types and
    then tell the glibc people...

Kudos to you, Paul, for an implementation that is not only standards-compliant
and portable, but also faster than glibc!

Bruno

diff --git a/lib/totalorder.c b/lib/totalorder.c
index ee90ebb1f9..212e549305 100644
--- a/lib/totalorder.c
+++ b/lib/totalorder.c
@@ -21,6 +21,8 @@
 /* Specification.  */
 #include <math.h>
 
+#include <float.h>
+
 int
 totalorder (double const *x, double const *y)
 {
@@ -31,19 +33,27 @@ totalorder (double const *x, double const *y)
   if (!xs != !ys)
     return xs;
 
-  /* If one of *X, *Y is a NaN and the other isn't, the answer is easy
-     as well: the negative NaN is "smaller", the positive NaN is "greater"
-     than the other argument.  */
-  int xn = isnand (*x);
-  int yn = isnand (*y);
-  if (!xn != !yn)
-    return !xn == !xs;
-  /* If none of *X, *Y is a NaN, the '<=' operator does the job, including
-     for -Infinity and +Infinity.  */
-  if (!xn)
-    return *x <= *y;
-
-  /* At this point, *X and *Y are NaNs with the same sign bit.  */
+#if !(defined __hppa || defined __mips__ || defined __sh__)
+  /* If the 'double' type is as described by IEEE 754:2008 = ISO/IEC 60559:2011,
+     we can skip the 'isnand' tests.
+     Note that while gcc 13 defines __DBL_IS_IEC_60559__, clang 16 doesn't.  */
+  if (!(DBL_MAX_EXP == 1024 && DBL_MIN_EXP == -1021))
+#endif
+    {
+      /* If one of *X, *Y is a NaN and the other isn't, the answer is easy
+         as well: the negative NaN is "smaller", the positive NaN is "greater"
+         than the other argument.  */
+      int xn = isnand (*x);
+      int yn = isnand (*y);
+      if (!xn != !yn)
+        return !xn == !xs;
+      /* If none of *X, *Y is a NaN, the '<=' operator does the job, including
+         for -Infinity and +Infinity.  */
+      if (!xn)
+        return *x <= *y;
+
+      /* At this point, *X and *Y are NaNs with the same sign bit.  */
+    }
 
   unsigned long long extended_sign = -!!xs;
 #if defined __hppa || defined __mips__ || defined __sh__
diff --git a/lib/totalorderf.c b/lib/totalorderf.c
index fa2a1245de..c1137d5150 100644
--- a/lib/totalorderf.c
+++ b/lib/totalorderf.c
@@ -21,6 +21,8 @@
 /* Specification.  */
 #include <math.h>
 
+#include <float.h>
+
 int
 totalorderf (float const *x, float const *y)
 {
@@ -31,19 +33,27 @@ totalorderf (float const *x, float const *y)
   if (!xs != !ys)
     return xs;
 
-  /* If one of *X, *Y is a NaN and the other isn't, the answer is easy
-     as well: the negative NaN is "smaller", the positive NaN is "greater"
-     than the other argument.  */
-  int xn = isnanf (*x);
-  int yn = isnanf (*y);
-  if (!xn != !yn)
-    return !xn == !xs;
-  /* If none of *X, *Y is a NaN, the '<=' operator does the job, including
-     for -Infinity and +Infinity.  */
-  if (!xn)
-    return *x <= *y;
-
-  /* At this point, *X and *Y are NaNs with the same sign bit.  */
+#if !(defined __hppa || defined __mips__ || defined __sh__)
+  /* If the 'float' type is as described by IEEE 754:2008 = ISO/IEC 60559:2011,
+     we can skip the 'isnanf' tests.
+     Note that while gcc 13 defines __FLT_IS_IEC_60559__, clang 16 doesn't.  */
+  if (!(FLT_MAX_EXP == 128 && FLT_MIN_EXP == -125))
+#endif
+    {
+      /* If one of *X, *Y is a NaN and the other isn't, the answer is easy
+         as well: the negative NaN is "smaller", the positive NaN is "greater"
+         than the other argument.  */
+      int xn = isnanf (*x);
+      int yn = isnanf (*y);
+      if (!xn != !yn)
+        return !xn == !xs;
+      /* If none of *X, *Y is a NaN, the '<=' operator does the job, including
+         for -Infinity and +Infinity.  */
+      if (!xn)
+        return *x <= *y;
+
+      /* At this point, *X and *Y are NaNs with the same sign bit.  */
+    }
 
   unsigned int extended_sign = -!!xs;
 #if defined __hppa || defined __mips__ || defined __sh__
diff --git a/lib/totalorderl.c b/lib/totalorderl.c
index a06e70d55f..e00ca3c68a 100644
--- a/lib/totalorderl.c
+++ b/lib/totalorderl.c
@@ -37,19 +37,27 @@ totalorderl (long double const *x, long double const *y)
   if (!xs != !ys)
     return xs;
 
-  /* If one of *X, *Y is a NaN and the other isn't, the answer is easy
-     as well: the negative NaN is "smaller", the positive NaN is "greater"
-     than the other argument.  */
-  int xn = isnanl (*x);
-  int yn = isnanl (*y);
-  if (!xn != !yn)
-    return !xn == !xs;
-  /* If none of *X, *Y is a NaN, the '<=' operator does the job, including
-     for -Infinity and +Infinity.  */
-  if (!xn)
-    return *x <= *y;
-
-  /* At this point, *X and *Y are NaNs with the same sign bit.  */
+#if !(defined __hppa || defined __mips__ || defined __sh__)
+  /* If the 'long double' type is as described by IEEE 754:2008 =
+     ISO/IEC 60559:2011, we can skip the 'isnanl' tests.
+     Note that while gcc 13 defines __LDBL_IS_IEC_60559__, clang 16 doesn't.  */
+  if (!(LDBL_MAX_EXP == 16384 && LDBL_MIN_EXP == -16381))
+#endif
+    {
+      /* If one of *X, *Y is a NaN and the other isn't, the answer is easy
+         as well: the negative NaN is "smaller", the positive NaN is "greater"
+         than the other argument.  */
+      int xn = isnanl (*x);
+      int yn = isnanl (*y);
+      if (!xn != !yn)
+        return !xn == !xs;
+      /* If none of *X, *Y is a NaN, the '<=' operator does the job, including
+         for -Infinity and +Infinity.  */
+      if (!xn)
+        return *x <= *y;
+
+      /* At this point, *X and *Y are NaNs with the same sign bit.  */
+    }
 
   unsigned long long extended_sign = -!!xs;
 
diff --git a/modules/totalorder b/modules/totalorder
index 00f190c42e..5d04d02fd4 100644
--- a/modules/totalorder
+++ b/modules/totalorder
@@ -9,6 +9,7 @@ m4/totalorder.m4
 Depends-on:
 math
 extensions
+float           [test $HAVE_TOTALORDER = 0 || test $REPLACE_TOTALORDER = 1]
 isnand          [test $HAVE_TOTALORDER = 0 || test $REPLACE_TOTALORDER = 1]
 signbit         [test $HAVE_TOTALORDER = 0 || test $REPLACE_TOTALORDER = 1]
 
diff --git a/modules/totalorderf b/modules/totalorderf
index ab1569c9cc..07c881c9a4 100644
--- a/modules/totalorderf
+++ b/modules/totalorderf
@@ -9,6 +9,7 @@ m4/totalorder.m4
 Depends-on:
 math
 extensions
+float           [test $HAVE_TOTALORDERF = 0 || test $REPLACE_TOTALORDERF = 1]
 isnanf          [test $HAVE_TOTALORDERF = 0 || test $REPLACE_TOTALORDERF = 1]
 signbit         [test $HAVE_TOTALORDERF = 0 || test $REPLACE_TOTALORDERF = 1]

optimizing totalorder

Reply via email to