A few places in bits/basic_string.h use `traits_type::copy` to copy `__str.length() + 1` bytes.
Despite the knowledge that `__str.length()` is not greater than 15 the compiler emits (and sometimes inlines) a `memcpy` call. That results in a quite big set of instructions https://godbolt.org/z/j35MMfxzq Replacing `__str.length() + 1` with `_S_local_capacity + 1` explicitly forces the compiler to copy the whole `__str._M_local_buf`. As a result the assembly becomes almost 5 times shorter and without any function calls or multiple conditional jumps https://godbolt.org/z/bfq8bxra9 This patch always copies `_S_local_capacity + 1` if working with `std::char_traits<char>`. PR libstdc++/112682: * include/bits/basic_string.h: Optimize string moves. P.S.: still not sure that this optimization is not an UB or fine for libstdc++. However, the assembly looks much better with it. -- Best regards, Antony Polukhin
diff --git a/libstdc++-v3/include/bits/basic_string.h b/libstdc++-v3/include/bits/basic_string.h index 1b8ebca7dad..7a5e348280c 100644 --- a/libstdc++-v3/include/bits/basic_string.h +++ b/libstdc++-v3/include/bits/basic_string.h @@ -188,6 +188,23 @@ _GLIBCXX_BEGIN_NAMESPACE_CXX11 : basic_string(__svw._M_sv.data(), __svw._M_sv.size(), __a) { } #endif + _GLIBCXX17_CONSTEXPR + static bool + _S_permit_copying_indeterminate() noexcept + { + // Copying compile-time known _S_local_capacity + 1 bytes is much more + // efficient than copying runtime known __str.length() + 1. This + // function returns true, if such initialization is permitted even if + // the right side has indeterminate values. + // + // [dcl.init] permits initializing with indeterminate value of unsigned + // narrow character type. + // + // Library users should not specialize char_traits<char> so this is + // not observable for user. + return is_same<traits_type, char_traits<char> >::value; + } + // Use empty-base optimization: http://www.cantrip.org/emptyopt.html struct _Alloc_hider : allocator_type // TODO check __is_final { @@ -672,8 +689,10 @@ _GLIBCXX_BEGIN_NAMESPACE_CXX11 { if (__str._M_is_local()) { - traits_type::copy(_M_local_buf, __str._M_local_buf, - __str.length() + 1); + size_type __copy_count = _S_local_capacity + 1; + if _GLIBCXX17_CONSTEXPR (!_S_permit_copying_indeterminate()) + __copy_count = __str.length() + 1; + traits_type::copy(_M_local_buf, __str._M_local_buf, __copy_count); } else { @@ -711,8 +730,10 @@ _GLIBCXX_BEGIN_NAMESPACE_CXX11 { if (__str._M_is_local()) { - traits_type::copy(_M_local_buf, __str._M_local_buf, - __str.length() + 1); + size_type __copy_count = _S_local_capacity + 1; + if _GLIBCXX17_CONSTEXPR (!_S_permit_copying_indeterminate()) + __copy_count = __str.length() + 1; + traits_type::copy(_M_local_buf, __str._M_local_buf, __copy_count); _M_length(__str.length()); __str._M_set_length(0); }