On Mon, 16 Feb 2026 at 11:42, Tomasz Kamiński <[email protected]> wrote:
>
> This paper implement the changes from P2438R2 basic_string::substr() &&
> paper into C++26. The additional substr and constructor overload are
> implemented only for SSO string, as they require mutating the content
> (if reused), and thus would require copy of the string anyway for COW
> strings. (In consequence allocators implicitly constructible from int
> remain ambiguous for C++11 ABI strings, see r16-7497-gfa1149534d8580).
>
> In addition to cases when the allocators are not compatible (equal),
> this patch does not reuse (transfer) allocator storage, if the selected
> substring fits inside the SSO buffer, so we do not risk keeping large
> chunk of memory for few characters. (This also covers cases when the
> source stored the content in the local buffer).
>
> As this additional overloads are meant to be optimization, in contrast
> to move constructor, the source is left unmodified if the allocation
> is not transferred. This avoid introducing a write (of null terminator)
> to previously untouched, heap allocated, memory.
>
> Separate overloads for and substr(size_type __pos, size_type __n)
Was the word "and" meant to be between substr(...) and substr(...)?
> substr(size_type __pos == 0), that delegate to corresponding constructor,
> are provided to avoid the check of __n against the length() in the later
> case.
>
> Finally, the signatures of existing substr() overload are not modified
> (no longer required since C++20), which avoid any impact on the ABI.
It looks like Clang supports overloading `f() const` and `f()&&` since
version 18.0.0 so this works for the last four clang releases.
>
> PR libstdc++/119745
>
> libstdc++-v3/ChangeLog:
>
> * include/bits/basic_string.h (basic_string::_M_construct)
> [__cplusplus >= 202302L]: Declare.
> (basic_string::basic_string(basic_string&&, size_type, const _Alloc&))
> (basic_string(basic_string&&, size_type, size_type, const _Alloc&))
> (basic_string::substr(size_type, size_type) &&)
> ((basic_string::substr(size_type) &&) [__cplusplus >= 202302L]:
> Define.
> * include/bits/basic_string.tcc (basic_string::_M_construct)
> [__cplusplus >= 202302L]: Define.
> * testsuite/21_strings/basic_string/operations/substr/rvalue.cc: New
> test.
> ---
> Testint on x86-64-linux. Locally all test passed, and the
> substr/rvalue.cc with all standard modes, -m32, cxx11 and debug passed.
>
> OK for trunk when all tests passes?
>
> libstdc++-v3/include/bits/basic_string.h | 37 ++
> libstdc++-v3/include/bits/basic_string.tcc | 32 ++
> .../basic_string/operations/substr/rvalue.cc | 334 ++++++++++++++++++
> 3 files changed, 403 insertions(+)
> create mode 100644
> libstdc++-v3/testsuite/21_strings/basic_string/operations/substr/rvalue.cc
>
> diff --git a/libstdc++-v3/include/bits/basic_string.h
> b/libstdc++-v3/include/bits/basic_string.h
> index cd6f312f1bd..04377706cc2 100644
> --- a/libstdc++-v3/include/bits/basic_string.h
> +++ b/libstdc++-v3/include/bits/basic_string.h
> @@ -354,6 +354,11 @@ _GLIBCXX_BEGIN_NAMESPACE_CXX11
> void
> _M_construct(const _CharT *__c, size_type __n);
>
> +#if __cplusplus >= 202302L
> + constexpr void
> + _M_construct(basic_string&& __str, size_type __pos, size_type __n);
> +#endif
> +
> _GLIBCXX20_CONSTEXPR
> allocator_type&
> _M_get_allocator()
> @@ -671,6 +676,26 @@ _GLIBCXX_BEGIN_NAMESPACE_CXX11
> std::forward_iterator_tag());
> }
>
> +#if __cplusplus >= 202302L
> + _GLIBCXX20_CONSTEXPR
> + basic_string(basic_string&& __str, size_type __pos,
> + const _Alloc& __a = _Alloc())
> + : _M_dataplus(_M_local_data(), __a)
> + {
> + __pos = __str._M_check(__pos, "string::string");
> + _M_construct(std::move(__str), __pos, __str.length() - __pos);
> + }
> +
> + _GLIBCXX20_CONSTEXPR
> + basic_string(basic_string&& __str, size_type __pos, size_type __n,
> + const _Alloc& __a = _Alloc())
> + : _M_dataplus(_M_local_data(), __a)
> + {
> + __pos = __str._M_check(__pos, "string::string");
> + _M_construct(std::move(__str), __pos, __str._M_limit(__pos, __n));
> + }
> +#endif
> +
> /**
> * @brief Construct string initialized by a character %array.
> * @param __s Source character %array.
> @@ -3442,6 +3467,18 @@ _GLIBCXX_BEGIN_NAMESPACE_CXX11
> { return basic_string(*this,
> _M_check(__pos, "basic_string::substr"), __n); }
>
> +#if __cplusplus >= 202302L
> + _GLIBCXX_NODISCARD
> + constexpr basic_string
> + substr(size_type __pos = 0) &&
> + { return basic_string(std::move(*this), __pos); }
> +
Trailing whitespace on the line above.
> + _GLIBCXX_NODISCARD
> + constexpr basic_string
> + substr(size_type __pos, size_type __n) &&
> + { return basic_string(std::move(*this), __pos, __n); }
> +#endif // __cplusplus
> +
> #ifdef __glibcxx_string_subview // >= C++26
> /**
> * @brief Get a subview.
> diff --git a/libstdc++-v3/include/bits/basic_string.tcc
> b/libstdc++-v3/include/bits/basic_string.tcc
> index 75ffea42ced..ab5b06d86f8 100644
> --- a/libstdc++-v3/include/bits/basic_string.tcc
> +++ b/libstdc++-v3/include/bits/basic_string.tcc
> @@ -302,6 +302,38 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
> traits_type::assign(_M_data()[__n], _CharT());
> }
>
> +#if __cplusplus >= 202302L
> + template<typename _CharT, typename _Traits, typename _Alloc>
> + constexpr void
> + basic_string<_CharT, _Traits, _Alloc>::
> + _M_construct(basic_string&& __str, size_type __pos, size_type __n)
> + {
> + const _CharT* __start = __str._M_data() + __pos;
> + if (__n < _S_local_capacity)
> + {
> + _M_init_local_buf();
> + traits_type::copy(_M_local_buf, __start, __n);
> + _M_set_length(__n);
> + return;
> + }
> +
> + if constexpr (!allocator_traits<_Alloc>::is_always_equal::value)
> + if (get_allocator() != __str.get_allocator())
> + {
> + _M_construct<false>(__start, __n);
The line above seems to use spaces before the first tab.
> + return;
> + }
> +
> + _M_data(__str._M_data());
> + _M_capacity(__str._M_allocated_capacity);
> + __str._M_data(__str._M_use_local_data());
> + __str._M_set_length(0);
> +
> + _S_move(_M_data(), _M_data() + __pos, __n);
> + _M_set_length(__n);
> + }
> +#endif
> +
> template<typename _CharT, typename _Traits, typename _Alloc>
> _GLIBCXX20_CONSTEXPR
> void
> diff --git
> a/libstdc++-v3/testsuite/21_strings/basic_string/operations/substr/rvalue.cc
> b/libstdc++-v3/testsuite/21_strings/basic_string/operations/substr/rvalue.cc
> new file mode 100644
> index 00000000000..1c8d9f7aeb0
> --- /dev/null
> +++
> b/libstdc++-v3/testsuite/21_strings/basic_string/operations/substr/rvalue.cc
> @@ -0,0 +1,334 @@
> +// { dg-do run { target c++26 } }
> +
> +#include <string_view>
> +#include <testsuite_hooks.h>
> +#include <testsuite_allocator.h>
> +
> +// When selection is short enought to fit into SSO, the rhs
"enough"
> +// is left unchanged.
> +template<typename CharT, typename Allocator>
> +constexpr void
> +do_test_short_in(const CharT* cstr, Allocator alloc)
> +{
> + using StringView = std::basic_string_view<CharT>;
> + using String = std::basic_string<CharT, std::char_traits<CharT>,
> Allocator>;
> +
> + const Allocator defAlloc;
> + String src, res;
> +
> + // substr(0)
> + src = String(cstr, alloc);
> + res = std::move(src).substr(0);
> + VERIFY( res == StringView(cstr).substr(0) );
> + VERIFY( res.get_allocator() == defAlloc );
> + VERIFY( src == cstr );
> +
> + src = String(cstr, alloc);
> + String res1(std::move(src), 0);
I see this test FAIL for the COW string because of a warning on this
line (and on other lines later):
rvalue.cc:27: warning: ISO C++ says that these are ambiguous, even
though the worst conversion for the first is better than the worst
conversion for the second:
> + VERIFY( res1 == StringView(cstr).substr(0) );
> + VERIFY( res1.get_allocator() == defAlloc );
> + VERIFY( src == cstr );
> +
> + src = String(cstr, alloc);
> + String res2(std::move(src), 0, alloc);
> + VERIFY( res2 == StringView(cstr).substr(0) );
> + VERIFY( res2.get_allocator() == alloc );
> + VERIFY( src == cstr );
> +
> + // substr(1)
> + src = String(cstr, alloc);
> + res = std::move(src).substr(1);
> + VERIFY( res == StringView(cstr).substr(1) );
> + VERIFY( res.get_allocator() == defAlloc );
> + VERIFY( src == cstr );
> +
> + src = String(cstr, alloc);
> + String res3(std::move(src), 1);
> + VERIFY( res3 == StringView(cstr).substr(1) );
> + VERIFY( res3.get_allocator() == defAlloc );
> + VERIFY( src == cstr );
> +
> + src = String(cstr, alloc);
> + String res4(std::move(src), 1, alloc);
> + VERIFY( res4 == StringView(cstr).substr(1) );
> + VERIFY( res4.get_allocator() == alloc );
> + VERIFY( src == cstr );
> +
> + // substr(1, 1000)
> + src = String(cstr, alloc);
> + res = std::move(src).substr(1, 1000);
> + VERIFY( res == StringView(cstr).substr(1, 1000) );
> + VERIFY( res.get_allocator() == defAlloc );
> + VERIFY( src == cstr );
> +
> + src = String(cstr, alloc);
> + String res5(std::move(src), 1, 1000);
> + VERIFY( res5 == StringView(cstr).substr(1, 1000) );
> + VERIFY( res5.get_allocator() == defAlloc );
> + VERIFY( src == cstr );
> +
> + src = String(cstr, alloc);
> + String res6(std::move(src), 1, 1000, alloc);
> + VERIFY( res6 == StringView(cstr).substr(1, 1000) );
> + VERIFY( res6.get_allocator() == alloc );
> + VERIFY( src == cstr );
> +}
> +
> +template<typename CharT, typename Allocator>
> +constexpr void
> +do_test_short_sel(const CharT* cstr, Allocator alloc)
> +{
> + using StringView = std::basic_string_view<CharT>;
> + using String = std::basic_string<CharT, std::char_traits<CharT>,
> Allocator>;
> +
> + const Allocator defAlloc;
> + String src, res;
> +
> + // substr(0, 2)
> + src = String(cstr, alloc);
> + res = std::move(src).substr(0, 2);
> + VERIFY( res == StringView(cstr).substr(0, 2) );
> + VERIFY( res.get_allocator() == defAlloc );
> + VERIFY( src == cstr );
> +
> + src = String(cstr, alloc);
> + String res1(std::move(src), 0, 2);
> + VERIFY( res1 == StringView(cstr).substr(0, 2) );
> + VERIFY( res1.get_allocator() == defAlloc );
> + VERIFY( src == cstr );
> +
> + src = String(cstr, alloc);
> + String res2(std::move(src), 0, 2, alloc);
> + VERIFY( res2 == StringView(cstr).substr(0, 2) );
> + VERIFY( res2.get_allocator() == alloc );
> + VERIFY( src == cstr );
> +
> + // substr(1, 2)
> + src = String(cstr, alloc);
> + res = std::move(src).substr(1, 2);
> + VERIFY( res == StringView(cstr).substr(1, 2) );
> + VERIFY( res.get_allocator() == defAlloc );
> + VERIFY( src == cstr );
> +
> + src = String(cstr, alloc);
> + String res3(std::move(src), 1, 2);
> + VERIFY( res3 == StringView(cstr).substr(1, 2) );
> + VERIFY( res3.get_allocator() == defAlloc );
> + VERIFY( src == cstr );
> +
> + src = String(cstr, alloc);
> + String res4(std::move(src), 1, 2, alloc);
> + VERIFY( res4 == StringView(cstr).substr(1, 2) );
> + VERIFY( res4.get_allocator() == alloc );
> + VERIFY( src == cstr );
> +}
> +
> +// If the selection is long enugh to do not fit into SSO,
"enough"
> +// the memory is moved if allocators are compatible.
> +template<typename CharT, typename Allocator>
> +constexpr void
> +do_test_long(const CharT* cstr, Allocator alloc)
> +{
> + using StringView = std::basic_string_view<CharT>;
> + using String = std::basic_string<CharT, std::char_traits<CharT>,
> Allocator>;
> +
> + const Allocator defAlloc;
> + String src, res;
> + const CharT* data;
> +
> + auto verifyMove = [&](const String& str)
> + {
> + // Only SSO string provide rvalue overloads of
> + // substr and corresponding constructor.
> +#if _GLIBCXX_USE_CXX11_ABI || !_GLIBCXX_USE_DUAL_ABI
> + if (str.get_allocator() == alloc) {
> + VERIFY( str.data() == data );
> + VERIFY( src.empty() );
> + } else
> +#endif
> + VERIFY( src == cstr );
> + };
> +
> + // substr(0)
> + src = String(cstr, alloc);
> + data = src.data();
> + res = std::move(src).substr(0);
> + VERIFY( res == StringView(cstr).substr(0) );
> + VERIFY( res.get_allocator() == defAlloc );
> + verifyMove(res);
> +
> + src = String(cstr, alloc);
> + data = src.data();
> + String res1(std::move(src), 0);
> + VERIFY( res1 == StringView(cstr).substr(0) );
> + VERIFY( res1.get_allocator() == defAlloc );
> + verifyMove(res1);
> +
> + src = String(cstr, alloc);
> + data = src.data();
> + String res2(std::move(src), 0, alloc);
> + VERIFY( res2 == StringView(cstr).substr(0) );
> + VERIFY( res2.get_allocator() == alloc );
> + verifyMove(res2);
> +
> + // substr(5)
> + src = String(cstr, alloc);
> + data = src.data();
> + res = std::move(src).substr(5);
> + VERIFY( res == StringView(cstr).substr(5) );
> + VERIFY( res.get_allocator() == defAlloc );
> + verifyMove(res);
> +
> + src = String(cstr, alloc);
> + data = src.data();
> + String res3(std::move(src), 5);
> + VERIFY( res3 == StringView(cstr).substr(5) );
> + VERIFY( res3.get_allocator() == defAlloc );
> + verifyMove(res3);
> +
> + src = String(cstr, alloc);
> + data = src.data();
> + String res4(std::move(src), 5, alloc);
> + VERIFY( res4 == StringView(cstr).substr(5) );
> + verifyMove(res4);
> +
> + // substr(0, 50)
> + src = String(cstr, alloc);
> + data = src.data();
> + res = std::move(src).substr(0, 50);
> + VERIFY( res == StringView(cstr).substr(0, 50) );
> + VERIFY( res.get_allocator() == defAlloc );
> + verifyMove(res);
> +
> + src = String(cstr, alloc);
> + data = src.data();
> + String res5(std::move(src), 0, 50);
> + VERIFY( res5 == StringView(cstr).substr(0, 50) );
> + VERIFY( res5.get_allocator() == defAlloc );
> + verifyMove(res5);
> +
> + src = String(cstr, alloc);
> + data = src.data();
> + String res6(std::move(src), 0, 50, alloc);
> + VERIFY( res6 == StringView(cstr).substr(0, 50) );
> + VERIFY( res6.get_allocator() == alloc );
> + verifyMove(res6);
> +
> + // substr(5, 50)
> + src = String(cstr, alloc);
> + data = src.data();
> + res = std::move(src).substr(5, 50);
> + VERIFY( res == StringView(cstr).substr(5, 50) );
> + VERIFY( res.get_allocator() == defAlloc );
> + verifyMove(res);
> +
> + src = String(cstr, alloc);
> + data = src.data();
> + String res7(std::move(src), 5, 50);
> + VERIFY( res7 == StringView(cstr).substr(5, 50) );
> + VERIFY( res7.get_allocator() == defAlloc );
> + verifyMove(res7);
> +
> + src = String(cstr, alloc);
> + data = src.data();
> + String res8(std::move(src), 5, 50, alloc);
> + VERIFY( res8 == StringView(cstr).substr(5, 50) );
> + VERIFY( res8.get_allocator() == alloc );
> + verifyMove(res8);
> +
> + // substr(5, 100)
> + src = String(cstr, alloc);
> + data = src.data();
> + res = std::move(src).substr(5, 1000);
> + VERIFY( res == StringView(cstr).substr(5, 1000) );
> + VERIFY( res.get_allocator() == defAlloc );
> + verifyMove(res);
> +
> + src = String(cstr, alloc);
> + data = src.data();
> + String res9(std::move(src), 5, 1000);
> + VERIFY( res9 == StringView(cstr).substr(5, 1000) );
> + VERIFY( res9.get_allocator() == defAlloc );
> + verifyMove(res9);
> +
> + src = String(cstr, alloc);
> + data = src.data();
> + String res10(std::move(src), 5, 1000, alloc);
> + VERIFY( res10 == StringView(cstr).substr(5, 1000) );
> + VERIFY( res10.get_allocator() == alloc );
> + verifyMove(res10);
> +}
> +
> +template<typename CharT, typename Allocator>
> +constexpr void
> +do_test_alloc(const CharT* sht, const CharT* lng, Allocator alloc)
> +{
> + do_test_short_in(sht, alloc);
> + do_test_short_sel(sht, alloc);
> + do_test_short_sel(lng, alloc);
> + do_test_long(lng, alloc);
> +}
> +
> +template<typename CharT>
> +constexpr void
> +do_test_bounds(const CharT* cstr)
> +{
> + using String = std::basic_string<CharT>;
> + try
> + {
> + auto res = String(cstr).substr(10);
> + VERIFY(false);
> + } catch (const std::out_of_range&) {
> + VERIFY(true);
> + }
> +
> + try
> + {
> + auto res = String(cstr).substr(10, 20);
> + VERIFY(false);
> + } catch (const std::out_of_range&) {
> + VERIFY(true);
> + }
> +
> + try
> + {
> + String res(String(cstr), 10);
> + VERIFY(false);
> + } catch (const std::out_of_range&) {
> + VERIFY(true);
> + }
> +
> + try
> + {
> + String res(String(cstr), 10, 20);
> + VERIFY(false);
> + } catch (const std::out_of_range&) {
> + VERIFY(true);
> + }
> +}
> +
> +template<typename CharT>
> +constexpr void
> +do_test(const CharT* sht, const CharT* lng)
> +{
> + do_test_alloc(sht, lng, std::allocator<CharT>());
> + if ! consteval {
> + do_test_alloc(sht, lng, __gnu_test::uneq_allocator<CharT>());
> + do_test_alloc(sht, lng, __gnu_test::uneq_allocator<CharT>(42));
> + do_test_bounds(sht);
> + }
> +}
> +
> +constexpr bool
> +test_all()
> +{
> + do_test("abcd", "some very very long string, that will not use SSO, and
> have at least fifty characters");
> + do_test(L"ab", L"some very very long string, that will not use SSO, and
> have at least fifty characters");
> +
> + return true;
> +}
> +
> +int main()
> +{
> + test_all();
> +}
> --
> 2.53.0
>