Thanks for the explanation.
During my review, I unexpectedly discovered that this UTF_view, which
accepts an input_range, doesn't consider the fact that the input_iterator
only guarantees that it can be moved. It uses copying in many places.
Perhaps we should only accept forward_range.
Or perhaps we should do nothing, since only libstdc++ authors are using it
anyway.

Tomasz Kaminski <[email protected]> 於 2025年11月15日 週六 上午1:58寫道:

>
>
> On Fri, Nov 14, 2025 at 6:55 PM Hewill Kang <[email protected]> wrote:
>
>> LGTM.
>>
>> I know that CTAD wasn't provided due to heavy header considerations. But
>> wouldn't providing CTAD be an enhancement?
>> At least in terms of user convenience.
>>
> I care much more about the use of libstdc++ compile time performance, that
> convenience of the
> libstdc++ authors (only people that should use it). If we get
> official Utf_view, it should
> have the deduction guide in place.
>
>>
>> Tomasz Kamiński <[email protected]> 於 2025年11月15日 週六 上午1:06寫道:
>>
>>> Previously, _Utf_view accepted any input_range, including
>>> reference-to-array
>>> types like char(&)[2], and stored it as the _M_base member. In such
>>> cases,
>>> _Utf_view was not assignable, failing the requirement to be a view
>>> concept.
>>>
>>> This patch addresses the issue by adding the ranges::view constraint to
>>> the
>>> second template parameter of _Utf_view, and for clarity renaming it from
>>> _Range to _View. The constructor is also adjusted to accept its argument
>>> by value (views must be O(1) move-constructible). This prevents
>>> implicitly
>>> generated CTAD from deducing a reference type.
>>>
>>> This makes _Utf_view consistent with both other standard views and the
>>> wording from P2728R8: Unicode in the Library, Part 1: UTF Transcoding
>>> [1].
>>>
>>> The explicit CTAD from viewable_range is not defined for _Utf_view
>>> because
>>> it depends on views::all_t, views::ref_view, and views::owning_view,
>>> which are declared in <ranges>. Consequently, users must explicitly cast
>>> the argument to a view or specify it as a template parameter.
>>>
>>> [1]
>>> https://www.open-std.org/jtc1/sc22/wg21/docs/papers/2025/p2728r8.html
>>>
>>> libstdc++-v3/ChangeLog:
>>>
>>>         * include/bits/unicode.h (_Utf_view): Rename the template
>>> parameter
>>>         from _Range to _View and constrain it with ranges::view.
>>>         (_Utf_view::_Utf_view): Accept by value instead of rvalue
>>> reference.
>>>         * include/std/format (__format::__write_padded): Replace
>>> _Utf_view
>>>         over const char32_t(&)[1] with span<const char32_t, 1>.
>>>         * testsuite/ext/unicode/view.cc: Add checks if specialization
>>>         of _Utf_view satisfy view. Wrap arrays into std::span before
>>>         cosntructing _Utf_view.
>>> ---
>>>  libstdc++-v3/include/bits/unicode.h        | 15 ++++++++-------
>>>  libstdc++-v3/include/std/format            |  2 +-
>>>  libstdc++-v3/testsuite/ext/unicode/view.cc | 16 ++++++++++------
>>>  3 files changed, 19 insertions(+), 14 deletions(-)
>>>
>>> diff --git a/libstdc++-v3/include/bits/unicode.h
>>> b/libstdc++-v3/include/bits/unicode.h
>>> index 44872db4ed6..09f7c2d7bfb 100644
>>> --- a/libstdc++-v3/include/bits/unicode.h
>>> +++ b/libstdc++-v3/include/bits/unicode.h
>>> @@ -695,13 +695,14 @@ namespace __unicode
>>>         friend class _Utf_iterator;
>>>      };
>>>
>>> -  template<typename _ToFormat, ranges::input_range _Range>
>>> +  template<typename _ToFormat, ranges::input_range _View>
>>> +    requires ranges::view<_View>
>>>      class _Utf_view
>>> -    : public ranges::view_interface<_Utf_view<_ToFormat, _Range>>
>>> +    : public ranges::view_interface<_Utf_view<_ToFormat, _View>>
>>>      {
>>> -      using _Iterator = _Utf_iterator<ranges::range_value_t<_Range>,
>>> -                                     _ToFormat,
>>> ranges::iterator_t<_Range>,
>>> -                                     ranges::sentinel_t<_Range>>;
>>> +      using _Iterator = _Utf_iterator<ranges::range_value_t<_View>,
>>> +                                     _ToFormat,
>>> ranges::iterator_t<_View>,
>>> +                                     ranges::sentinel_t<_View>>;
>>>
>>>        template<typename _Iter, typename _Sent>
>>>         constexpr auto
>>> @@ -725,11 +726,11 @@ namespace __unicode
>>>             return _Iterator(__last, __last);
>>>         }
>>>
>>> -      _Range _M_base;
>>> +      _View _M_base;
>>>
>>>      public:
>>>        constexpr explicit
>>> -      _Utf_view(_Range&& __r) : _M_base(std::forward<_Range>(__r)) { }
>>> +      _Utf_view(_View __r) : _M_base(std::move(__r)) { }
>>>
>>>        constexpr auto begin()
>>>        { return _M_begin(ranges::begin(_M_base), ranges::end(_M_base)); }
>>> diff --git a/libstdc++-v3/include/std/format
>>> b/libstdc++-v3/include/std/format
>>> index 1102ac8f6e8..f64f35a202e 100644
>>> --- a/libstdc++-v3/include/std/format
>>> +++ b/libstdc++-v3/include/std/format
>>> @@ -845,7 +845,7 @@ namespace __format
>>>           {
>>>             // Encode fill char as multiple code units of type _CharT.
>>>             const char32_t __arr[1]{ __fill_char };
>>> -           _Utf_view<_CharT, const char32_t(&)[1]> __v(__arr);
>>> +           _Utf_view<_CharT, span<const char32_t, 1>> __v(__arr);
>>>             basic_string<_CharT> __padstr(__v.begin(), __v.end());
>>>             __padding = __padstr;
>>>             while (__l-- > 0)
>>> diff --git a/libstdc++-v3/testsuite/ext/unicode/view.cc
>>> b/libstdc++-v3/testsuite/ext/unicode/view.cc
>>> index 40c8fcf34fb..677a21d8c1f 100644
>>> --- a/libstdc++-v3/testsuite/ext/unicode/view.cc
>>> +++ b/libstdc++-v3/testsuite/ext/unicode/view.cc
>>> @@ -7,6 +7,10 @@
>>>  namespace uc = std::__unicode;
>>>  using namespace std::string_view_literals;
>>>
>>> +static_assert( std::ranges::view<uc::_Utf8_view<std::string_view>> );
>>> +static_assert( std::ranges::view<uc::_Utf16_view<std::string_view>> );
>>> +static_assert( std::ranges::view<uc::_Utf32_view<std::string_view>> );
>>> +
>>>  template<std::ranges::range View>
>>>  constexpr void
>>>  compare(View v,
>>> std::basic_string_view<std::ranges::range_value_t<View>> s)
>>> @@ -87,18 +91,18 @@ test_illformed_utf16()
>>>    compare(uc::_Utf16_view(s.substr(0, 1)), r);
>>>    compare(uc::_Utf16_view(s.substr(1, 1)), r);
>>>    std::array s2{ s[0], s[0] };
>>> -  compare(uc::_Utf16_view(s2), u"\uFFFD\uFFFD"sv);
>>> +  compare(uc::_Utf16_view(std::span(s2)), u"\uFFFD\uFFFD"sv);
>>>    std::array s3{ s[0], s[0], s[1] };
>>> -  compare(uc::_Utf16_view(s3), u"\uFFFD\N{CLOWN FACE}"sv);
>>> +  compare(uc::_Utf16_view(std::span(s3)), u"\uFFFD\N{CLOWN FACE}"sv);
>>>    std::array s4{ s[1], s[0] };
>>> -  compare(uc::_Utf16_view(s4), u"\uFFFD\uFFFD"sv);
>>> +  compare(uc::_Utf16_view(std::span(s4)), u"\uFFFD\uFFFD"sv);
>>>    std::array s5{ s[1], s[0], s[1] };
>>> -  compare(uc::_Utf16_view(s5), u"\uFFFD\N{CLOWN FACE}"sv);
>>> +  compare(uc::_Utf16_view(std::span(s5)), u"\uFFFD\N{CLOWN FACE}"sv);
>>>
>>>    std::array<char16_t, 2> s6{ 0xDC00, 0xDC01 };
>>> -  compare(uc::_Utf16_view(s6), u"\uFFFD\uFFFD"sv);
>>> +  compare(uc::_Utf16_view(std::span(s6)), u"\uFFFD\uFFFD"sv);
>>>    std::array<char16_t, 2> s7{ 0xD7FF, 0xDC00 };
>>> -  compare(uc::_Utf16_view(s7), u"\uD7FF\uFFFD"sv);
>>> +  compare(uc::_Utf16_view(std::span(s7)), u"\uD7FF\uFFFD"sv);
>>>  }
>>>
>>>  constexpr void
>>> --
>>> 2.51.0
>>>
>>>

Reply via email to