Only some operations on std::shared_ptr and std::weak_ptr must
synchronize with each other, namely, destructions and calls to member
reset(). In the atomic implementation, synchronization is accomplished
with release/acquire memory-ordering operations on refcount decrements.
Copies, involving refcount increments, do not synchronize.

On TSO architectures like x86-64, there is little observable difference.
But on architectures with weaker memory models like aarch64, the
compiler may emit faster instruction sequences for atomic increments
without memory ordering than with.

        PR libstdc++/111589

libstdc++-v3/ChangeLog:

        PR libstdc++/111589
        * include/bits/shared_ptr_base.h (_Sp_counted_base::_M_add_ref_copy):
        Use __exchange_and_add_relaxed_dispatch instead of
        __exchange_and_add_dispatch.
        (_Sp_counted_base::_M_weak_add_ref): Likewise.
        (_Sp_counted_base::_M_add_ref_lock_nothrow): Use __ATOMIC_RELAXED
        instead of __ATOMIC_ACQ_REL for the success ordering.
        * include/ext/atomicity.h (__exchange_and_add_relaxed): New function.
        (__atomic_add_relaxed): New function.
        (__exchange_and_add_relaxed_dispatch): New function.
        (__atomic_add_relaxed_dispatch): New function.
---
 libstdc++-v3/include/bits/shared_ptr_base.h | 11 ++++--
 libstdc++-v3/include/ext/atomicity.h        | 43 +++++++++++++++++++++
 2 files changed, 51 insertions(+), 3 deletions(-)

diff --git a/libstdc++-v3/include/bits/shared_ptr_base.h 
b/libstdc++-v3/include/bits/shared_ptr_base.h
index 88e0f4d58c6..1aa0d4faa85 100644
--- a/libstdc++-v3/include/bits/shared_ptr_base.h
+++ b/libstdc++-v3/include/bits/shared_ptr_base.h
@@ -148,7 +148,10 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
       // Increment the use count (used when the count is greater than zero).
       void
       _M_add_ref_copy()
-      { _S_chk(__gnu_cxx::__exchange_and_add_dispatch(&_M_use_count, 1)); }
+      {
+       _S_chk(__gnu_cxx::__exchange_and_add_relaxed_dispatch(&_M_use_count,
+                                                             1));
+      }
 
       // Increment the use count if it is non-zero, throw otherwise.
       void
@@ -204,7 +207,9 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
        // _M_weak_count can always use negative values because it cannot be
        // observed by users (unlike _M_use_count). See _S_chk for details.
        constexpr _Atomic_word __max = -1;
-       if (__gnu_cxx::__exchange_and_add_dispatch(&_M_weak_count, 1) == __max)
+       auto __r
+         = __gnu_cxx::__exchange_and_add_relaxed_dispatch(&_M_weak_count, 1);
+       if (__r == __max)
          [[__unlikely__]] __builtin_trap();
       }
 
@@ -358,7 +363,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
          // long as it's not changed meanwhile.
        }
       while (!__atomic_compare_exchange_n(&_M_use_count, &__count, __count + 1,
-                                         true, __ATOMIC_ACQ_REL,
+                                         true, __ATOMIC_RELAXED,
                                          __ATOMIC_RELAXED));
       _S_chk(__count);
       return true;
diff --git a/libstdc++-v3/include/ext/atomicity.h 
b/libstdc++-v3/include/ext/atomicity.h
index ecbe376a687..6ecda307990 100644
--- a/libstdc++-v3/include/ext/atomicity.h
+++ b/libstdc++-v3/include/ext/atomicity.h
@@ -74,12 +74,33 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
   __attribute__((__always_inline__))
   __atomic_add(volatile _Atomic_word* __mem, int __val)
   { __atomic_fetch_add(__mem, __val, __ATOMIC_ACQ_REL); }
+
+  inline _Atomic_word
+  __attribute__((__always_inline__))
+  __exchange_and_add_relaxed(volatile _Atomic_word* __mem, int __val)
+  { return __atomic_fetch_add(__mem, __val, __ATOMIC_RELAXED); }
+
+  inline void
+  __attribute__((__always_inline__))
+  __atomic_add_relaxed(volatile _Atomic_word* __mem, int __val)
+  { __atomic_fetch_add(__mem, __val, __ATOMIC_RELAXED); }
 #else // Defined in config/cpu/.../atomicity.h
   _Atomic_word
   __exchange_and_add(volatile _Atomic_word*, int) _GLIBCXX_NOTHROW;
 
   void
   __atomic_add(volatile _Atomic_word*, int) _GLIBCXX_NOTHROW;
+
+  // Fallback to acq_rel when builtins are not available.
+  inline _Atomic_word
+  __attribute__((__always_inline__))
+  __exchange_and_add_relaxed(volatile _Atomic_word* __mem, int __val)
+  { return __exchange_and_add(__mem, __val); }
+
+  inline void
+  __attribute__((__always_inline__))
+  __atomic_add_relaxed(volatile _Atomic_word* __mem, int __val)
+  { __atomic_add(__mem, __val); }
 #endif
 
 #if __cplusplus < 201103L
@@ -154,6 +175,28 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
       __atomic_add(__mem, __val);
   }
 
+  // Relaxed variants for reference count increments where acquire/release
+  // semantics are not required. Decrements still need stronger ordering.
+  inline _Atomic_word
+  __attribute__ ((__always_inline__))
+  __exchange_and_add_relaxed_dispatch(_Atomic_word* __mem, int __val)
+  {
+    if (__is_single_threaded())
+      return __exchange_and_add_single(__mem, __val);
+    else
+      return __exchange_and_add_relaxed(__mem, __val);
+  }
+
+  inline void
+  __attribute__ ((__always_inline__))
+  __atomic_add_relaxed_dispatch(_Atomic_word* __mem, int __val)
+  {
+    if (__is_single_threaded())
+      __atomic_add_single(__mem, __val);
+    else
+      __atomic_add_relaxed(__mem, __val);
+  }
+
 _GLIBCXX_END_NAMESPACE_VERSION
 } // namespace
 
-- 
2.47.3

Reply via email to