On Wednesday, 14 July 2021 14:42:01 CEST H.J. Lu wrote:
> On Wed, Jul 14, 2021 at 12:32 AM Matthias Kretz <m.kr...@gsi.de> wrote:
> > OK?
> > 
> > On Wednesday, 30 June 2021 10:59:28 CEST Matthias Kretz wrote:
> > > Library code, especially in headers, sometimes needs to know how the
> > > compiler interprets / optimizes floating-point types and operations.
> > > This information can be used for additional optimizations or for
> > > ensuring correctness. This change makes -freciprocal-math,
> > > -fno-signed-zeros, -fno-trapping-math, -fassociative-math, and
> > > -frounding-math report their state via corresponding pre-defined macros.
> > > 
> > > Signed-off-by: Matthias Kretz <m.kr...@gsi.de>
> > > 
> > > gcc/testsuite/ChangeLog:
> > >       * gcc.dg/associative-math-1.c: New test.
> > >       * gcc.dg/associative-math-2.c: New test.
> > >       * gcc.dg/no-signed-zeros-1.c: New test.
> > >       * gcc.dg/no-signed-zeros-2.c: New test.
> > >       * gcc.dg/no-trapping-math-1.c: New test.
> > >       * gcc.dg/no-trapping-math-2.c: New test.
> > >       * gcc.dg/reciprocal-math-1.c: New test.
> > >       * gcc.dg/reciprocal-math-2.c: New test.
> > >       * gcc.dg/rounding-math-1.c: New test.
> > >       * gcc.dg/rounding-math-2.c: New test.
> > > 
> > > gcc/c-family/ChangeLog:
> > >       * c-cppbuiltin.c (c_cpp_builtins_optimize_pragma): Define or
> > >       undefine __RECIPROCAL_MATH__, __NO_SIGNED_ZEROS__,
> > >       __NO_TRAPPING_MATH__, __ASSOCIATIVE_MATH__, and
> > >       __ROUNDING_MATH__ according to the new optimization flags.
> > > 
> > > gcc/ChangeLog:
> > >       * cppbuiltin.c (define_builtin_macros_for_compilation_flags):
> > >       Define __RECIPROCAL_MATH__, __NO_SIGNED_ZEROS__,
> > >       __NO_TRAPPING_MATH__, __ASSOCIATIVE_MATH__, and
> > >       __ROUNDING_MATH__ according to their corresponding flags.
> > >       * doc/cpp.texi: Document __RECIPROCAL_MATH__,
> > >       __NO_SIGNED_ZEROS__, __NO_TRAPPING_MATH__, __ASSOCIATIVE_MATH__,
> > >       and __ROUNDING_MATH__.
> > > 
> 
> Hi Hongtao,
> 
> Can this be used to address
> 
> https://gcc.gnu.org/pipermail/gcc/2021-July/236778.html

It should help to determine when a workaround is necessary. I use inline asm 
to implement the workaround. Relevant libstdc++ code (not upstream yet and not 
making use of __ASSOCIATIVE_MATH__ yet):

/*
 * Ensure the expressions leading up to the @p __x argument are evaluated at 
least once.
 *
 * Example: __force_evaluation(x + y) - y will not optimize to x with -
fassociative-math.
 * _TV is expected to be __vector_type_t<floating-point type, N>.
 */
template <typename _TV>
  [[__gnu__::__flatten__, __gnu__::__const__]]
  _GLIBCXX_SIMD_INTRINSIC constexpr
  _TV
  __force_evaluation(_TV __x) noexcept
  {
    if (__builtin_is_constant_evaluated())
      return __x;
    else
      return [&] {
        if constexpr(__have_sse)
          {
            if constexpr (sizeof(__x) >= 16)
              {
                asm("" :: "x"(__x));
                asm("" : "+x"(__x));
              }
            else if constexpr (is_same_v<__vector_type_t<float, 2>, _TV>)
              {
                asm("" :: "x"(__x[0]), "x"(__x[1]));
                asm("" : "+x"(__x[0]), "+x"(__x[1]));
              }
            else
              __assert_unreachable<_TV>();
          }
        else if constexpr(__have_neon)
          {
            asm("" :: "w"(__x));
            asm("" : "+w"(__x));
          }
        else if constexpr (__have_power_vmx)
          {
            if constexpr (is_same_v<__vector_type_t<float, 2>, _TV>)
              {
                asm("" :: "fgr"(__x[0]), "fgr"(__x[1]));
                asm("" : "+fgr"(__x[0]), "+fgr"(__x[1]));
              }
            else
              {
                asm("" :: "v"(__x));
                asm("" : "+v"(__x));
              }
          }
        else
          {
            asm("" :: "g"(__x));
            asm("" : "+g"(__x));
          }
        return __x;
      }();
  }

// Returns __x + __y - __y without -fassociative-math optimizing to __x.
// - _TV must be __vector_type_t<floating-point type, N>.
// - _UV must be _TV or floating-point type.
template <typename _TV, typename _UV>
  [[__gnu__::__const__]]
  _GLIBCXX_SIMD_INTRINSIC constexpr
  _TV
  __plus_minus(_TV __x, _UV __y) noexcept
  {
#if defined __clang__ || __GCC_IEC_559 > 0
    return (__x + __y) - __y;
#else
    if (__builtin_is_constant_evaluated()
          || (__builtin_constant_p(__x) && __builtin_constant_p(__y)))
      return (__x + __y) - __y;
#if defined __i386__ && !defined __SSE_MATH__
    else if constexpr (sizeof(__x) == 8)
      { // operations on __x would use the FPU
        static_assert(is_same_v<_TV, __vector_type_t<float, 2>>);
        const auto __x4 = __vector_bitcast<float, 4>(__x);
        if constexpr (is_same_v<_TV, _UV>)
          return __vector_bitcast<float, 2>(
                   __plus_minus(__x4, __vector_bitcast<float, 4>(__y)));
        else
          return __vector_bitcast<float, 2>(__plus_minus(__x4, __y));
      }
#endif
    else
      return __force_evaluation(__x + __y) - __y;
#endif
  }


-- 
──────────────────────────────────────────────────────────────────────────
 Dr. Matthias Kretz                           https://mattkretz.github.io
 GSI Helmholtz Centre for Heavy Ion Research               https://gsi.de
 std::experimental::simd              https://github.com/VcDevel/std-simd
──────────────────────────────────────────────────────────────────────────



Reply via email to