Changes in v15:
* Remove redundant <cstdint>, "uniform_int_dist.h".
* Improve comment text.
* Rename template parameter _Int => _UInt to better reflect usage.
* Rename implementation fns to __generate_canonical_{pow2,any}.
* Define URBG range, other consts with explicit types to prevent
promotion.
* Name _GLIBCXX_GENERATE_CANONICAL_STRICT per convention.
* Simplify logic for the strict-conformance case.
* Initialize local variable __sum at definition.
* Format 'for' and 'if' bodies per convention.
* Make __gen_con_is_float a variable template.
* Static-assert non-support of float16_t (IEEE half-precision),
and #if-guard use cases requiring __int128 when it is not available.
* Give digits precision template argument the obvious default.
* Test C++-11 through -26, built both -m64 and -m32.
Changes in v14:
* Remove dependence on __STRICT_ANSI__, which is turned on by
"-std=c++26".
Changes in v13:
* Spell __STRICT_ANSI__ correctly.
Changes in v12:
* use __builtin_popcountg() which works on all unsigned integer types.
Changes in v11:
* Add doxygen entry for generate_canonical.
Changes in v10:
* Rewrite entirely after consultation with P0952 authors.
* Require radix2 for all float types.
* Perform all intermediate calculations on integers.
* Use one implementation for all C++11 to C++26.
* Optimize for each digits resolution.
* Test also with irregular URBG returning 0..999999.
* Work under Clang, for -std=c++14 and up.
* By default and where possible (which is usually), preserve more
entropy than the naive Standard prescription.
Implement P0952R2 "A new specification for std::generate_canonical".
It has us start over, using new entropy, if the naïve generation of
a value in 0..1 comes up not-less-than 1, which occurs too rarely
for the change to measurably affect performance.
The fix is extended to all of C++11 to 26. It dispatches to variations
optimized for each bit depth, using minimal integer sizes for exact
intermediate calculations, and prefers shift operations over
multiply/divide where practical.
This patch adds thorough tests for statistical properties. It adds new
static asserts on template argument requirements where supported.
It adds a test using a non-optimal RNG that yields values 0..999999.
A test for the case addressed in P0952 already appears in 64351.cc.
This change amounts to a different resolution for PR64351 and LWG 2524.
libstdc++-v3/ChangeLog:
PR libstdc++/119739
* include/bits/random.tcc: Add generate_canonical impl for C++26.
*
testsuite/26_numerics/random/uniform_real_distribution/operators/64351.cc:
Adapt test for both pre- and post- C++26.
*
testsuite/26_numerics/random/uniform_real_distribution/operators/gencanon.cc:
Test for float and double from 32-bit RNG, including 1.0 do-over.
---
libstdc++-v3/include/bits/random.tcc | 252 ++++++++++++++++--
.../operators/64351.cc | 24 +-
.../operators/gencanon.cc | 165 ++++++++++++
3 files changed, 392 insertions(+), 49 deletions(-)
create mode 100644
libstdc++-v3/testsuite/26_numerics/random/uniform_real_distribution/operators/gencanon.cc
diff --git a/libstdc++-v3/include/bits/random.tcc
b/libstdc++-v3/include/bits/random.tcc
index 53ccacb2e38..687bcfb0586 100644
--- a/libstdc++-v3/include/bits/random.tcc
+++ b/libstdc++-v3/include/bits/random.tcc
@@ -3349,43 +3349,237 @@ namespace __detail
}
}
- template<typename _RealType, size_t __bits,
- typename _UniformRandomNumberGenerator>
- _RealType
- generate_canonical(_UniformRandomNumberGenerator& __urng)
- {
- static_assert(std::is_floating_point<_RealType>::value,
- "template argument must be a floating point type");
-
- const size_t __b
- = std::min(static_cast<size_t>(std::numeric_limits<_RealType>::digits),
- __bits);
- const long double __r = static_cast<long double>(__urng.max())
- - static_cast<long double>(__urng.min()) + 1.0L;
- const size_t __log2r = std::log(__r) / std::log(2.0L);
- const size_t __m = std::max<size_t>(1UL,
- (__b + __log2r - 1UL) / __log2r);
- _RealType __ret;
- _RealType __sum = _RealType(0);
- _RealType __tmp = _RealType(1);
- for (size_t __k = __m; __k != 0; --__k)
+// [rand.util.canonical]
+// generate_canonical(RNG&)
+
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wc++14-extensions" // for variable templates
+#pragma GCC diagnostic ignored "-Wc++17-extensions" // if constexpr
+
+ // This version is used when URBG::max()-URBG::min() is a power of
+ // two less 1, the norm in real programs. It works by calling urng()
+ // as many times as needed to fill the target mantissa, accumulating
+ // entropy into an integer value, converting that to the float type,
+ // and then dividing by the range of the integer value (a power of 2,
+ // so only adjusts the exponent) to produce a result in [0..1]. In
+ // case of an exact 1.0 result, we re-try.
+ //
+ // It needs to work even when the integer type used is only as big
+ // as the float mantissa, such as uint64_t for long double. So,
+ // commented-out lines below represent computations the Standard
+ // prescribes but cannot be performed, or are not used. Names are
+ // chosen to match the description in the Standard.
+ //
+ // When the result is close to zero, the strict Standard-prescribed
+ // calculation may leave more low-order zeros in the mantissa than is
+ // usually necessary. When spare entropy has been extracted, as is
+ // usual for float and double, some or all of the spare entropy can
+ // commonly be pulled into the result for better randomness.
+ //
+ // When k calls to urng() yield more bits of entropy log2_Rk_max than
+ // fit into UInt, we discard some of it by overflowing, which is OK.
+ // On converting the integer representation of the sample to the float
+ // value, we must divide out the possibly-truncated size log2_Rk.
+ //
+ // This implementation works with std::bfloat16, which can exactly
+ // represent 2^32, but not std::float16_t limited to 2^15.
+
+ template<typename _RealT, typename _UInt, size_t __d, typename _URBG>
+ _RealT
+ __generate_canonical_pow2(_URBG& __urng)
+ {
+ // Parameter __d is the actual target number of digits.
+ // const _UInt __r = 2;
+ using _RNG_t = decltype(_URBG::max());
+ const _RNG_t __rng_range = _URBG::max() - _URBG::min();
+ const _UInt __uint_range = ~_UInt(0);
+ // const _UInt __R = _UInt(__rng_range) + 1; // May wrap to 0.
+ const auto __log2_R = __builtin_popcountg(__rng_range);
+ const auto __log2_uint_max = __builtin_popcountg(__uint_range);
+ // const _UInt __rd = _UInt(1) << __d; // Could overflow, UB.
+ const unsigned __k = (__d + __log2_R - 1) / __log2_R;
+ const unsigned __log2_Rk_max = __k * __log2_R;
+ const unsigned __log2_Rk = // Bits of entropy actually obtained.
+ __log2_uint_max < __log2_Rk_max ? __log2_uint_max : __log2_Rk_max;
+ static_assert(__log2_Rk >= __d);
+ // const _UInt __Rk = _UInt(1) << __log2_Rk; // Likely overflows, UB.
+ constexpr _RealT __Rk = _RealT(_UInt(1) << (__log2_Rk - 1)) * 2.0;
+#if defined(_GLIBCXX_GENERATE_CANONICAL_STRICT)
+ const unsigned __log2_x = __log2_Rk - __d; // # of spare entropy bits.
+#else
+ const unsigned __log2_x = 0;
+#endif
+ const _UInt __x = _UInt(1) << __log2_x;
+ constexpr _RealT __rd = __Rk / __x;
+ // const _UInt __xrd = __x << __d; // Could overflow.
+
+ while (true)
+ {
+ _UInt __sum = _UInt(__urng() - _URBG::min());
+ for (unsigned __i = __k - 1, __shift = 0; __i > 0; --__i)
{
- __sum += _RealType(__urng() - __urng.min()) * __tmp;
- __tmp *= __r;
+ __shift += __log2_R;
+ __sum |= _UInt(__urng() - _URBG::min()) << __shift;
}
- __ret = __sum / __tmp;
- if (__builtin_expect(__ret >= _RealType(1), 0))
+ const _RealT __ret = _RealT(__sum >> __log2_x) / __rd;
+ if (__ret < _RealT(1.0))
+ return __ret;
+ }
+ }
+
+ template <typename _UInt>
+ constexpr _UInt
+ __gen_con_pow(_UInt __base, size_t __exponent)
+ {
+ _UInt __prod = __base;
+ while (--__exponent != 0) __prod *= __base;
+ return __prod;
+ }
+
+ template <typename _UInt>
+ constexpr unsigned
+ __gen_con_rng_calls_needed(_UInt __R, _UInt __rd)
+ {
+ unsigned __i = 1;
+ for (auto __Ri = __R; __Ri < __rd; __Ri *= __R)
+ ++__i;
+ return __i;
+ }
+
+ // This version must be used when the range of possible RNG results,
+ // URBG::max()-URBG::min(), is not a power of two less one. The UInt
+ // type passed must be big enough to represent Rk, R^k, a power of R
+ // (the range of values produced by the rng) up to twice the length
+ // of the mantissa.
+
+ template<typename _RealT, typename _UInt, size_t __d, typename _URBG>
+ _RealT
+ __generate_canonical_any(_URBG& __urng)
+ {
+ // Names below are chosen to match the description in the Standard.
+ // Parameter d is the actual target number of bits.
+ const _UInt __r = 2;
+ const _UInt __R = _UInt(_URBG::max() - _URBG::min()) + 1;
+ const _UInt __rd = __gen_con_pow(__r, __d);
+ const unsigned __k = __gen_con_rng_calls_needed(__R, __rd);
+ const _UInt __Rk = __gen_con_pow(__R, __k);
+ const _UInt __x = __Rk/__rd;
+
+ while (true)
+ {
+ _UInt __Ri = 1, __sum = __urng() - _URBG::min();
+ for (int __i = __k - 1; __i > 0; --__i)
{
-#if _GLIBCXX_USE_C99_MATH_FUNCS
- __ret = std::nextafter(_RealType(1), _RealType(0));
+ __Ri *= __R;
+ __sum += (__urng() - _URBG::min()) * __Ri;
+ }
+ const _RealT __ret = _RealT(__sum / __x) / __rd;
+ if (__ret < _RealT(1.0))
+ return __ret;
+ }
+ }
+
+#if !defined(_GLIBCXX_GENERATE_CANONICAL_STRICT)
+ template <typename _Tp>
+ const bool __gen_con_is_float_v = is_floating_point<_Tp>::value;
+# define _GLIBCXX_DEF_LARGE = -1u
+#else
+ template <typename _Tp> const bool __gen_con_is_float_v = false;
+ template <> const bool __gen_con_is_float_v<float> = true;
+ template <> const bool __gen_con_is_float_v<double> = true;
+ template <> const bool __gen_con_is_float_v<long double> = true;
+# define _GLIBCXX_DEF_LARGE /* no default setting */
+#endif
+
+ /** Produce a random floating-point value in the range [0..1)
+ *
+ * The result of `std::generate_canonical<RealT,digits>(urng)` is a
+ * random floating-point value of type `RealT` in the range [0..1),
+ * using entropy provided by the uniform random bit generator `urng`.
+ * A value for `digits` may be passed to limit the precision of the
+ * result to so many bits, but normally `-1u` is passed to get the
+ * native precision of `RealT`. As many `urng()` calls are made as
+ * needed to obtain the required entropy. On rare occasions, more
+ * `urng()` calls are used. It is fastest when the value of
+ * `URBG::max()` is a power of two less one, such as from a
+ * `std::philox4x32` (for `float`) or `philox4x64` (for `double`).
+ *
+ * @since C++11
+ */
+ template<typename _RealT, size_t __digits _GLIBCXX_DEF_LARGE, typename _URBG>
+ _RealT
+ generate_canonical(_URBG& __urng)
+ {
+#if __cpp_lib_concepts >= 201907
+ static_assert(requires { [](uniform_random_bit_generator auto) {}(
+ static_cast<std::remove_cv_t<_URBG>&&>(__urng)); },
+ "argument must meet uniform_random_bit_generator requirements");
+#endif
+#if __cpp_lib_concepts >= 201806
+ static_assert(requires { _RealT(_URBG::max()); },
+ "template float argument must be coercible from unsigned");
+#endif
+ static_assert(__gen_con_is_float_v<_RealT>,
+ "template argument must be floating point");
+ static_assert(__digits != 0 && _URBG::max() > _URBG::min(),
+ "random samples with 0 bits are not meaningful");
+ static_assert(std::numeric_limits<_RealT>::radix == 2,
+ "only base-2 float types are supported");
+# if defined(__STDCPP_FLOAT16_T__)
+ static_assert(! is_same_v<_RealT, _Float16>,
+ "float16_t type is not supported, consider using bfloat16_t");
+#endif
+
+ using _U32 = __UINT32_TYPE__;
+ using _U64 = __UINT64_TYPE__;
+ using _RNG_t = decltype(_URBG::max());
+ const unsigned __d_max = std::numeric_limits<_RealT>::digits;
+ const unsigned __d = __digits > __d_max ? __d_max : __digits;
+ // Note, this works even when (__range + 1) overflows:
+ const auto is_power_of_2_less_1 = [](_RNG_t __range)
+ { return ((__range + 1) & __range) == 0; };
+
+ // If the RNG range is a power of 2 less 1, the float type mantissa
+ // is enough bits. If not, we need more.
+ if constexpr (is_power_of_2_less_1(_URBG::max() - _URBG::min()))
+ {
+ if constexpr (__d <= 32)
+ return __generate_canonical_pow2<_RealT, _U32, __d>(__urng);
+ else if constexpr (__d <= 64)
+ return __generate_canonical_pow2<_RealT, _U64, __d>(__urng);
+ else // Accommodate double double or float128.
+ {
+#if defined(__SIZEOF_INT128__)
+ return __generate_canonical_pow2<
+ _RealT, unsigned __int128, __d>(__urng);
+#else
+ static_assert(false,
+ "float precision >64 requires __int128 support");
+#endif
+ }
+ }
+ else // Need up to 2x bits.
+ {
+ if constexpr (__d <= 32)
+ return __generate_canonical_any<_RealT, _U64, __d>(__urng);
+ else
+ {
+#if defined(__SIZEOF_INT128__)
+ static_assert(__d <= 64,
+ "irregular RNG with float precision >64 is not supported");
+ return
+ __generate_canonical_any<_RealT, unsigned __int128, __d>(__urng);
#else
- __ret = _RealType(1)
- - std::numeric_limits<_RealType>::epsilon() / _RealType(2);
+ static_assert(false,
+ "irregular RNG with float precision >32 requires __int128 support");
#endif
}
- return __ret;
+ }
}
+#undef _GLIBCXX_DEF_LARGE
+#pragma GCC diagnostic pop
+
_GLIBCXX_END_NAMESPACE_VERSION
} // namespace
diff --git
a/libstdc++-v3/testsuite/26_numerics/random/uniform_real_distribution/operators/64351.cc
b/libstdc++-v3/testsuite/26_numerics/random/uniform_real_distribution/operators/64351.cc
index 3c0cb8bbd49..dbb95a42d85 100644
---
a/libstdc++-v3/testsuite/26_numerics/random/uniform_real_distribution/operators/64351.cc
+++
b/libstdc++-v3/testsuite/26_numerics/random/uniform_real_distribution/operators/64351.cc
@@ -21,21 +21,6 @@
#include <random>
#include <testsuite_hooks.h>
-// libstdc++/64351
-void
-test01()
-{
- std::mt19937 rng(8890);
- std::uniform_real_distribution<float> dist;
-
- rng.discard(30e6);
- for (long i = 0; i < 10e6; ++i)
- {
- auto n = dist(rng);
- VERIFY( n != 1.f );
- }
-}
-
// libstdc++/63176
void
test02()
@@ -47,19 +32,18 @@ test02()
std::mt19937 rng2{rng};
for (int i = 0; i < 20; ++i)
{
- float n =
- std::generate_canonical<float, std::numeric_limits<float>::digits>(rng);
+ float n = std::generate_canonical<float>(rng);
VERIFY( n != 1.f );
- // PR libstdc++/80137
rng2.discard(1);
- VERIFY( rng == rng2 );
}
+ // PR libstdc++/80137
+ rng2.discard(1); // account for a 1.0 generated and discarded.
+ VERIFY( rng == rng2 );
}
int
main()
{
- test01();
test02();
}
diff --git
a/libstdc++-v3/testsuite/26_numerics/random/uniform_real_distribution/operators/gencanon.cc
b/libstdc++-v3/testsuite/26_numerics/random/uniform_real_distribution/operators/gencanon.cc
new file mode 100644
index 00000000000..0e6de954dd2
--- /dev/null
+++
b/libstdc++-v3/testsuite/26_numerics/random/uniform_real_distribution/operators/gencanon.cc
@@ -0,0 +1,165 @@
+// { dg-do run { target { c++11 && { ! simulator } } } }
+
+#include <random>
+#include <limits>
+#include <type_traits>
+#include <cmath>
+#include <testsuite_hooks.h>
+#include <array>
+
+struct local_rng : std::mt19937
+{
+ static constexpr std::uint64_t min() { return 0; }
+ static constexpr std::uint64_t max() { return 999999; }
+ std::uint64_t operator()()
+ { return static_cast<std::mt19937&>(*this)() % (max() + 1); }
+ local_rng(std::mt19937 const& arg) : std::mt19937(arg) {}
+};
+
+// Verify P0952R9 implementation requiring a second round-trip
+// if first yields exactly 1. In this test, the RNG delivering
+// 32 bits per call is seeded such that this occurs once on the
+// sixth iteration for float, and not at all for double.
+// However, each double iteration requires two calls to the RNG.
+
+template <typename T, typename RNG>
+void
+test01(RNG& rng, RNG& rng2,
+ int& deviation, int& max, int& rms, int& zero, int& skips)
+{
+ const auto size = 1000000, buckets = 100;
+ std::array<int, buckets> histo{};
+ for (auto i = 0; i != size; ++i) {
+ T sample = std::generate_canonical<T>(rng);
+ VERIFY(sample >= T(0.0));
+ VERIFY(sample < T(1.0)); // libstdc++/64351
+ if (sample == T(0.0)) {
+ ++zero;
+ }
+ auto bucket = static_cast<int>(std::floor(sample * buckets));
+ ++histo[bucket];
+ rng2.discard(1);
+ if (rng != rng2) {
+ ++skips;
+ rng2.discard(1);
+ VERIFY(rng == rng2);
+ }
+ }
+ int devsquare = 0;
+ for (int i = 0; i < buckets; ++i) {
+ const auto expected = size / buckets;
+ auto count = histo[i];
+ auto diff = count - expected;
+ if (diff < 0) diff = -diff;
+ deviation += diff;
+ devsquare += diff * diff;
+ if (diff > max) max = diff;
+ }
+ rms = std::sqrt(devsquare);
+}
+
+// This one is for use with local_rng
+template <typename T, typename RNG>
+void
+test02(RNG& rng, RNG& rng2,
+ int& deviation, int& max, int& rms, int& zero, int& skips)
+{
+ const auto size = 1000000, buckets = 100;
+ std::array<int, buckets> histo{};
+ for (auto i = 0; i != size; ++i) {
+ T sample = std::generate_canonical<T>(rng);
+ VERIFY(sample >= T(0.0));
+ VERIFY(sample < T(1.0)); // libstdc++/64351
+ if (sample == T(0.0)) {
+ ++zero;
+ }
+ auto bucket = static_cast<int>(std::floor(sample * buckets));
+ ++histo[bucket];
+ rng2.discard(2);
+ if (rng != rng2) {
+ ++skips;
+ rng2.discard(2);
+ VERIFY(rng == rng2);
+ }
+ }
+ int devsquare = 0;
+ for (int i = 0; i < buckets; ++i) {
+ const auto expected = size / buckets;
+ auto count = histo[i];
+ auto diff = count - expected;
+ if (diff < 0) diff = -diff;
+ deviation += diff;
+ devsquare += diff * diff;
+ if (diff > max) max = diff;
+ }
+ rms = std::sqrt(devsquare);
+}
+
+int
+main()
+{
+ std::mt19937 rng(8890);
+ std::seed_seq sequence{0, 1, 2, 3, 4, 5, 6, 7, 8, 9};
+ rng.seed(sequence);
+ rng.discard(12 * 629143);
+
+ { // float
+ int deviation{}, max{}, rms{}, zero{}, skips{};
+ auto rng2{rng};
+ auto rng3{rng};
+ test01<float>(rng2, rng3, deviation, max, rms, zero, skips);
+
+ if (std::numeric_limits<float>::is_iec559) {
+ VERIFY(deviation == 7032);
+ VERIFY(max == 276);
+ VERIFY(rms == 906);
+ VERIFY(zero == 0);
+ }
+ VERIFY(skips == 1);
+ }
+ { // double
+ int deviation{}, max{}, rms{}, zero{}, skips{};
+ auto rng2{rng};
+ auto rng3{rng};
+ test01<double>(rng2, rng3, deviation, max, rms, zero, skips);
+
+ if (std::numeric_limits<double>::is_iec559) {
+ VERIFY(deviation == 7650);
+ VERIFY(max == 259);
+ VERIFY(rms == 975);
+ VERIFY(zero == 0);
+ }
+ VERIFY(skips == 1000000);
+ }
+ { // long double, same answers as double
+ int deviation{}, max{}, rms{}, zero{}, skips{};
+ auto rng2{rng};
+ auto rng3{rng};
+ test01<long double>(rng2, rng3, deviation, max, rms, zero, skips);
+
+ if (std::numeric_limits<double>::is_iec559) {
+ VERIFY(deviation == 7650);
+ VERIFY(max == 259);
+ VERIFY(rms == 975);
+ VERIFY(zero == 0);
+ }
+ VERIFY(skips == 1000000);
+ }
+
+ { // local RNG, returns [0..1000000)
+ int deviation{}, max{}, rms{}, zero{}, skips{};
+ local_rng rng2{rng};
+ local_rng rng3{rng};
+ test02<float>(rng2, rng3, deviation, max, rms, zero, skips);
+
+ if (std::numeric_limits<float>::is_iec559)
+ {
+ VERIFY(deviation == 8146);
+ VERIFY(max == 250);
+ VERIFY(rms == 1021);
+ VERIFY(zero == 0);
+ }
+ VERIFY(skips == 18);
+ }
+ return 0;
+}
--
2.51.0