On Wednesday, 14 July 2021 14:42:01 CEST H.J. Lu wrote: > On Wed, Jul 14, 2021 at 12:32 AM Matthias Kretz <m.kr...@gsi.de> wrote: > > OK? > > > > On Wednesday, 30 June 2021 10:59:28 CEST Matthias Kretz wrote: > > > Library code, especially in headers, sometimes needs to know how the > > > compiler interprets / optimizes floating-point types and operations. > > > This information can be used for additional optimizations or for > > > ensuring correctness. This change makes -freciprocal-math, > > > -fno-signed-zeros, -fno-trapping-math, -fassociative-math, and > > > -frounding-math report their state via corresponding pre-defined macros. > > > > > > Signed-off-by: Matthias Kretz <m.kr...@gsi.de> > > > > > > gcc/testsuite/ChangeLog: > > > * gcc.dg/associative-math-1.c: New test. > > > * gcc.dg/associative-math-2.c: New test. > > > * gcc.dg/no-signed-zeros-1.c: New test. > > > * gcc.dg/no-signed-zeros-2.c: New test. > > > * gcc.dg/no-trapping-math-1.c: New test. > > > * gcc.dg/no-trapping-math-2.c: New test. > > > * gcc.dg/reciprocal-math-1.c: New test. > > > * gcc.dg/reciprocal-math-2.c: New test. > > > * gcc.dg/rounding-math-1.c: New test. > > > * gcc.dg/rounding-math-2.c: New test. > > > > > > gcc/c-family/ChangeLog: > > > * c-cppbuiltin.c (c_cpp_builtins_optimize_pragma): Define or > > > undefine __RECIPROCAL_MATH__, __NO_SIGNED_ZEROS__, > > > __NO_TRAPPING_MATH__, __ASSOCIATIVE_MATH__, and > > > __ROUNDING_MATH__ according to the new optimization flags. > > > > > > gcc/ChangeLog: > > > * cppbuiltin.c (define_builtin_macros_for_compilation_flags): > > > Define __RECIPROCAL_MATH__, __NO_SIGNED_ZEROS__, > > > __NO_TRAPPING_MATH__, __ASSOCIATIVE_MATH__, and > > > __ROUNDING_MATH__ according to their corresponding flags. > > > * doc/cpp.texi: Document __RECIPROCAL_MATH__, > > > __NO_SIGNED_ZEROS__, __NO_TRAPPING_MATH__, __ASSOCIATIVE_MATH__, > > > and __ROUNDING_MATH__. > > > > > Hi Hongtao, > > Can this be used to address > > https://gcc.gnu.org/pipermail/gcc/2021-July/236778.html
It should help to determine when a workaround is necessary. I use inline asm to implement the workaround. Relevant libstdc++ code (not upstream yet and not making use of __ASSOCIATIVE_MATH__ yet): /* * Ensure the expressions leading up to the @p __x argument are evaluated at least once. * * Example: __force_evaluation(x + y) - y will not optimize to x with - fassociative-math. * _TV is expected to be __vector_type_t<floating-point type, N>. */ template <typename _TV> [[__gnu__::__flatten__, __gnu__::__const__]] _GLIBCXX_SIMD_INTRINSIC constexpr _TV __force_evaluation(_TV __x) noexcept { if (__builtin_is_constant_evaluated()) return __x; else return [&] { if constexpr(__have_sse) { if constexpr (sizeof(__x) >= 16) { asm("" :: "x"(__x)); asm("" : "+x"(__x)); } else if constexpr (is_same_v<__vector_type_t<float, 2>, _TV>) { asm("" :: "x"(__x[0]), "x"(__x[1])); asm("" : "+x"(__x[0]), "+x"(__x[1])); } else __assert_unreachable<_TV>(); } else if constexpr(__have_neon) { asm("" :: "w"(__x)); asm("" : "+w"(__x)); } else if constexpr (__have_power_vmx) { if constexpr (is_same_v<__vector_type_t<float, 2>, _TV>) { asm("" :: "fgr"(__x[0]), "fgr"(__x[1])); asm("" : "+fgr"(__x[0]), "+fgr"(__x[1])); } else { asm("" :: "v"(__x)); asm("" : "+v"(__x)); } } else { asm("" :: "g"(__x)); asm("" : "+g"(__x)); } return __x; }(); } // Returns __x + __y - __y without -fassociative-math optimizing to __x. // - _TV must be __vector_type_t<floating-point type, N>. // - _UV must be _TV or floating-point type. template <typename _TV, typename _UV> [[__gnu__::__const__]] _GLIBCXX_SIMD_INTRINSIC constexpr _TV __plus_minus(_TV __x, _UV __y) noexcept { #if defined __clang__ || __GCC_IEC_559 > 0 return (__x + __y) - __y; #else if (__builtin_is_constant_evaluated() || (__builtin_constant_p(__x) && __builtin_constant_p(__y))) return (__x + __y) - __y; #if defined __i386__ && !defined __SSE_MATH__ else if constexpr (sizeof(__x) == 8) { // operations on __x would use the FPU static_assert(is_same_v<_TV, __vector_type_t<float, 2>>); const auto __x4 = __vector_bitcast<float, 4>(__x); if constexpr (is_same_v<_TV, _UV>) return __vector_bitcast<float, 2>( __plus_minus(__x4, __vector_bitcast<float, 4>(__y))); else return __vector_bitcast<float, 2>(__plus_minus(__x4, __y)); } #endif else return __force_evaluation(__x + __y) - __y; #endif } -- ────────────────────────────────────────────────────────────────────────── Dr. Matthias Kretz https://mattkretz.github.io GSI Helmholtz Centre for Heavy Ion Research https://gsi.de std::experimental::simd https://github.com/VcDevel/std-simd ──────────────────────────────────────────────────────────────────────────