On Mon, 12 Jan 2026 at 08:28, Tomasz Kaminski <[email protected]> wrote:
>
>
>
> On Sun, Jan 11, 2026 at 5:34 PM Jonathan Wakely <[email protected]> wrote:
>>
>> This defines __platform_wait, __platform_notify, and
>> __platform_wait_until for FreeBSD, making use of the _umtx_op syscall.
>>
>> The Linux versions of those functions only support 32-bit integers, but
>> the FreeBSD versions use the syscall for both 32-bit and 64-bit types,
>> as the _umtx_op supports both.
>>
>> libstdc++-v3/ChangeLog:
>>
>> PR libstdc++/120527
>> * include/bits/atomic_wait.h [__FreeBSD__] (__platform_wait_t):
>> Define typedef.
>> [__FreeBSD__] (__platform_wait_uses_type): Define variable
>> template.
>> * src/c++20/atomic.cc [__FreeBSD__] (_GLIBCXX_HAVE_PLATFORM_WAIT)
>> (__platform_wait, __platform_notify, __platform_wait_until):
>> Define.
>> (__platform_load): Define.
>> ---
>>
>> v4: Tomasz noticed that __spin_impl assumes the waitable is always a
>> __platform_wait_t, but the FreeBSD patch enables the fast path for both
>> 32-bit and 64-bit integers, so we need to adjust how __spin_impl loads
>> the current value of the waitable. This adds a __platform_load function,
>>
>> which does an atomic load of the right size. For Linux, it's always
>> loading from int* but for FreeBSD it might be a 4B or 8B type.
>
> I think this should go to patch description, or preferably follow what you did
> in previous patches, and put introduction into __platform_load into separate
> commit,
> explaining why we need it.
I'll add it to the commit msg, that would be better.
>
> Outside of that LGTM.
>>
>>
>> Tested x86_64-linux and x86_64-freebsd.
>>
>> I see a few unexplained FAILs when testing with -m32 on freebsd, but
>> they're already present on trunk so aren't caused by this patch. This
>> passes all tests with -m64 (and the changes should only affect 64-bit
>> more, because of the __SIZEOF_LONG__ == 8 check).
>>
>> libstdc++-v3/include/bits/atomic_wait.h | 11 ++++
>> libstdc++-v3/src/c++20/atomic.cc | 78 ++++++++++++++++++++++++-
>> 2 files changed, 87 insertions(+), 2 deletions(-)
>>
>> diff --git a/libstdc++-v3/include/bits/atomic_wait.h
>> b/libstdc++-v3/include/bits/atomic_wait.h
>> index eff1be604eb4..e8487390ecb5 100644
>> --- a/libstdc++-v3/include/bits/atomic_wait.h
>> +++ b/libstdc++-v3/include/bits/atomic_wait.h
>> @@ -69,6 +69,17 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
>> inline constexpr bool __platform_wait_uses_type
>> = __detail::__waitable<_Tp>
>> && sizeof(_Tp) == sizeof(int) && alignof(_Tp) >= 4;
>> +#elif defined __FreeBSD__ && __SIZEOF_LONG__ == 8
>> + namespace __detail
>> + {
>> + using __platform_wait_t = __UINT64_TYPE__;
>> + inline constexpr size_t __platform_wait_alignment = 8;
>> + }
>> + template<typename _Tp>
>> + inline constexpr bool __platform_wait_uses_type
>> + = __detail::__waitable<_Tp>
>> + && ((sizeof(_Tp) == 4 && alignof(_Tp) >= 4)
>> + || (sizeof(_Tp) == 8 && alignof(_Tp) >= 8));
>> #else
>> // define _GLIBCX_HAVE_PLATFORM_WAIT and implement __platform_wait()
>> // and __platform_notify() if there is a more efficient primitive supported
>> diff --git a/libstdc++-v3/src/c++20/atomic.cc
>> b/libstdc++-v3/src/c++20/atomic.cc
>> index 3167592786e1..215f1212dd99 100644
>> --- a/libstdc++-v3/src/c++20/atomic.cc
>> +++ b/libstdc++-v3/src/c++20/atomic.cc
>> @@ -27,7 +27,7 @@
>> #if __glibcxx_atomic_wait
>> #include <atomic>
>> #include <bits/atomic_timed_wait.h>
>> -#include <cstdint> // uint32_t, uint64_t
>> +#include <cstdint> // uint32_t, uint64_t, uintptr_t
>> #include <climits> // INT_MAX
>> #include <cerrno> // errno, ETIMEDOUT, etc.
>> #include <bits/std_mutex.h> // std::mutex, std::__condvar
>> @@ -39,6 +39,11 @@
>> # include <unistd.h>
>> # include <sys/time.h> // timespec
>> # define _GLIBCXX_HAVE_PLATFORM_WAIT 1
>> +#elif defined __FreeBSD__ && __FreeBSD__ >= 11 && __SIZEOF_LONG__ == 8
>> +# include <sys/types.h>
>> +# include <sys/umtx.h>
>> +# include <sys/time.h>
>> +# define _GLIBCXX_HAVE_PLATFORM_WAIT 1
>> #endif
>>
>> #pragma GCC diagnostic ignored "-Wmissing-field-initializers"
>> @@ -87,6 +92,13 @@ namespace
>> __wait_clock_t::time_point timeout,
>> int obj_size) = delete;
>>
>> + // This is needed even when we don't have __platform_wait
>> + [[gnu::always_inline]]
>> + inline __wait_value_type
>> + __platform_load(const __platform_wait_t* addr, int memory_order,
>> + int /* obj_sz */) noexcept
>> + { return __atomic_load_n(addr, memory_order); }
>> +
>> #elif defined _GLIBCXX_HAVE_LINUX_FUTEX
>>
>> const int futex_private_flag = 128;
>> @@ -136,6 +148,68 @@ namespace
>> }
>> return true;
>> }
>> +
>> + [[gnu::always_inline]]
>> + inline __wait_value_type
>> + __platform_load(const int* addr, int order, int /* obj_sz */) noexcept
>
> I checked that we use int for other __platform functions with futex patch.
Yes, that's intentional so that we would get a compilation error if we
passed a __wait_value_type* or some other type here. The argument
should be __platform_wait_t* and for Linux that should be int*.
>>
>> + { return __atomic_load_n(addr, order); }
>> +
>> +#elif defined __FreeBSD__ && __SIZEOF_LONG__ == 8
>> + [[gnu::always_inline]]
>> + inline int
>> + wait_op(int obj_sz) noexcept
>> + { return obj_sz == sizeof(unsigned) ? UMTX_OP_WAIT_UINT : UMTX_OP_WAIT; }
>> +
>> + void
>> + __platform_wait(const void* addr, uint64_t val, int obj_sz) noexcept
>> + {
>> + if (_umtx_op(const_cast<void*>(addr), wait_op(obj_sz), val,
>> + nullptr, nullptr))
>> + if (errno != EINTR)
>> + __throw_system_error(errno);
>> + }
>> +
>> + void
>> + __platform_notify(const void* addr, bool all, int /* obj_sz */) noexcept
>> + {
>> + const int count = all ? INT_MAX : 1;
>> + _umtx_op(const_cast<void*>(addr), UMTX_OP_WAKE, count, nullptr,
>> nullptr);
>> + }
>> +
>> + // returns true if wait ended before timeout
>> + bool
>> + __platform_wait_until(const void* addr, uint64_t val,
>> + const __wait_clock_t::time_point& atime,
>> + int obj_sz) noexcept
>> + {
>> + struct _umtx_time timeout = {
>> + ._timeout = chrono::__to_timeout_timespec(atime),
>> + ._flags = UMTX_ABSTIME,
>> + ._clockid = CLOCK_MONOTONIC
>> + };
>> + // _umtx_op hangs if timeout._timeout is {0, 0}
>> + if (atime.time_since_epoch() < chrono::nanoseconds(1))
>> + return false;
>> + constexpr uintptr_t timeout_sz = sizeof(timeout);
>> + if (_umtx_op(const_cast<void*>(addr), wait_op(obj_sz), val,
>> + (void*)timeout_sz, &timeout))
>> + {
>> + if (errno == ETIMEDOUT)
>> + return false;
>> + if (errno != EINTR)
>> + __throw_system_error(errno);
>> + }
>> + return true;
>> + }
>> +
>> + [[gnu::always_inline]]
>> + inline __wait_value_type
>> + __platform_load(const void* addr, int order, int obj_sz) noexcept
>> + {
>> + if (obj_sz == sizeof(long))
>> + return __atomic_load_n(static_cast<const long*>(addr), order);
>> + return __atomic_load_n(static_cast<const unsigned*>(addr), order);
>> + }
>> #endif // HAVE_PLATFORM_WAIT
>>
>> // The state used by atomic waiting and notifying functions.
>> @@ -259,7 +333,7 @@ namespace
>> __wait_value_type wval;
>> for (auto i = 0; i < atomic_spin_count; ++i)
>> {
>> - wval = __atomic_load_n(addr, args._M_order);
>> + wval = __platform_load(addr, args._M_order, args._M_obj_size);
>
> I was thinking if we should have separate __spin_impl, which would hois the
> check
> outside of the loop, but this is anyway busy, wait, and the compiler sees the
> __platform_load
> body. And the __platform_load solution, does not add any if on platform where
> waiting only
> on single size is supported.
Yes, exactly. For Linux we don't need a runtime branch on _M_obj_size
because it's always 4.
Only the targets that support waiting on different sizes need the
extra branch in __platform_load.
>>
>> if (wval != args._M_old)
>> return { ._M_val = wval, ._M_has_val = true, ._M_timeout = false };
>> if (i < atomic_spin_count_relax)
>> --
>> 2.52.0
>>