With the new benchmark in place, I measured the run time of - the glibc 2.35 implementation of totalorder, - the gnulib implementation (picked by configuring with gl_cv_func_totalorder_in_libm=no gl_cv_func_totalorder_no_libm=no \ gl_cv_func_totalorderf_in_libm=no gl_cv_func_totalorderf_no_libm=no \ gl_cv_func_totalorderl_in_libm=no gl_cv_func_totalorderl_no_libm=no \ - the gnulib implementation with some disabled NaN tests. This change (see attached patch) is correct: it still passes the unit tests.
Here are the running times (on x86_64) of "./bench-totalorder fdl 1000000": f d l glibc 1.816 1.671 2.078 gnulib 1.445 1.425 8.690 gnulib with patch 1.798 1.974 14.032 Conclusion: * My patch is a slowdown. It apparently "optimized" the fast path away. :-D * The gnulib implementation is significantly faster than glibc, except for the long-double case. I'll redo the measurements on various CPU types and then tell the glibc people... Kudos to you, Paul, for an implementation that is not only standards-compliant and portable, but also faster than glibc! Bruno
diff --git a/lib/totalorder.c b/lib/totalorder.c index ee90ebb1f9..212e549305 100644 --- a/lib/totalorder.c +++ b/lib/totalorder.c @@ -21,6 +21,8 @@ /* Specification. */ #include <math.h> +#include <float.h> + int totalorder (double const *x, double const *y) { @@ -31,19 +33,27 @@ totalorder (double const *x, double const *y) if (!xs != !ys) return xs; - /* If one of *X, *Y is a NaN and the other isn't, the answer is easy - as well: the negative NaN is "smaller", the positive NaN is "greater" - than the other argument. */ - int xn = isnand (*x); - int yn = isnand (*y); - if (!xn != !yn) - return !xn == !xs; - /* If none of *X, *Y is a NaN, the '<=' operator does the job, including - for -Infinity and +Infinity. */ - if (!xn) - return *x <= *y; - - /* At this point, *X and *Y are NaNs with the same sign bit. */ +#if !(defined __hppa || defined __mips__ || defined __sh__) + /* If the 'double' type is as described by IEEE 754:2008 = ISO/IEC 60559:2011, + we can skip the 'isnand' tests. + Note that while gcc 13 defines __DBL_IS_IEC_60559__, clang 16 doesn't. */ + if (!(DBL_MAX_EXP == 1024 && DBL_MIN_EXP == -1021)) +#endif + { + /* If one of *X, *Y is a NaN and the other isn't, the answer is easy + as well: the negative NaN is "smaller", the positive NaN is "greater" + than the other argument. */ + int xn = isnand (*x); + int yn = isnand (*y); + if (!xn != !yn) + return !xn == !xs; + /* If none of *X, *Y is a NaN, the '<=' operator does the job, including + for -Infinity and +Infinity. */ + if (!xn) + return *x <= *y; + + /* At this point, *X and *Y are NaNs with the same sign bit. */ + } unsigned long long extended_sign = -!!xs; #if defined __hppa || defined __mips__ || defined __sh__ diff --git a/lib/totalorderf.c b/lib/totalorderf.c index fa2a1245de..c1137d5150 100644 --- a/lib/totalorderf.c +++ b/lib/totalorderf.c @@ -21,6 +21,8 @@ /* Specification. */ #include <math.h> +#include <float.h> + int totalorderf (float const *x, float const *y) { @@ -31,19 +33,27 @@ totalorderf (float const *x, float const *y) if (!xs != !ys) return xs; - /* If one of *X, *Y is a NaN and the other isn't, the answer is easy - as well: the negative NaN is "smaller", the positive NaN is "greater" - than the other argument. */ - int xn = isnanf (*x); - int yn = isnanf (*y); - if (!xn != !yn) - return !xn == !xs; - /* If none of *X, *Y is a NaN, the '<=' operator does the job, including - for -Infinity and +Infinity. */ - if (!xn) - return *x <= *y; - - /* At this point, *X and *Y are NaNs with the same sign bit. */ +#if !(defined __hppa || defined __mips__ || defined __sh__) + /* If the 'float' type is as described by IEEE 754:2008 = ISO/IEC 60559:2011, + we can skip the 'isnanf' tests. + Note that while gcc 13 defines __FLT_IS_IEC_60559__, clang 16 doesn't. */ + if (!(FLT_MAX_EXP == 128 && FLT_MIN_EXP == -125)) +#endif + { + /* If one of *X, *Y is a NaN and the other isn't, the answer is easy + as well: the negative NaN is "smaller", the positive NaN is "greater" + than the other argument. */ + int xn = isnanf (*x); + int yn = isnanf (*y); + if (!xn != !yn) + return !xn == !xs; + /* If none of *X, *Y is a NaN, the '<=' operator does the job, including + for -Infinity and +Infinity. */ + if (!xn) + return *x <= *y; + + /* At this point, *X and *Y are NaNs with the same sign bit. */ + } unsigned int extended_sign = -!!xs; #if defined __hppa || defined __mips__ || defined __sh__ diff --git a/lib/totalorderl.c b/lib/totalorderl.c index a06e70d55f..e00ca3c68a 100644 --- a/lib/totalorderl.c +++ b/lib/totalorderl.c @@ -37,19 +37,27 @@ totalorderl (long double const *x, long double const *y) if (!xs != !ys) return xs; - /* If one of *X, *Y is a NaN and the other isn't, the answer is easy - as well: the negative NaN is "smaller", the positive NaN is "greater" - than the other argument. */ - int xn = isnanl (*x); - int yn = isnanl (*y); - if (!xn != !yn) - return !xn == !xs; - /* If none of *X, *Y is a NaN, the '<=' operator does the job, including - for -Infinity and +Infinity. */ - if (!xn) - return *x <= *y; - - /* At this point, *X and *Y are NaNs with the same sign bit. */ +#if !(defined __hppa || defined __mips__ || defined __sh__) + /* If the 'long double' type is as described by IEEE 754:2008 = + ISO/IEC 60559:2011, we can skip the 'isnanl' tests. + Note that while gcc 13 defines __LDBL_IS_IEC_60559__, clang 16 doesn't. */ + if (!(LDBL_MAX_EXP == 16384 && LDBL_MIN_EXP == -16381)) +#endif + { + /* If one of *X, *Y is a NaN and the other isn't, the answer is easy + as well: the negative NaN is "smaller", the positive NaN is "greater" + than the other argument. */ + int xn = isnanl (*x); + int yn = isnanl (*y); + if (!xn != !yn) + return !xn == !xs; + /* If none of *X, *Y is a NaN, the '<=' operator does the job, including + for -Infinity and +Infinity. */ + if (!xn) + return *x <= *y; + + /* At this point, *X and *Y are NaNs with the same sign bit. */ + } unsigned long long extended_sign = -!!xs; diff --git a/modules/totalorder b/modules/totalorder index 00f190c42e..5d04d02fd4 100644 --- a/modules/totalorder +++ b/modules/totalorder @@ -9,6 +9,7 @@ m4/totalorder.m4 Depends-on: math extensions +float [test $HAVE_TOTALORDER = 0 || test $REPLACE_TOTALORDER = 1] isnand [test $HAVE_TOTALORDER = 0 || test $REPLACE_TOTALORDER = 1] signbit [test $HAVE_TOTALORDER = 0 || test $REPLACE_TOTALORDER = 1] diff --git a/modules/totalorderf b/modules/totalorderf index ab1569c9cc..07c881c9a4 100644 --- a/modules/totalorderf +++ b/modules/totalorderf @@ -9,6 +9,7 @@ m4/totalorder.m4 Depends-on: math extensions +float [test $HAVE_TOTALORDERF = 0 || test $REPLACE_TOTALORDERF = 1] isnanf [test $HAVE_TOTALORDERF = 0 || test $REPLACE_TOTALORDERF = 1] signbit [test $HAVE_TOTALORDERF = 0 || test $REPLACE_TOTALORDERF = 1]