On Thu, 3 Apr 2025 at 09:55, Tomasz Kamiński <[email protected]> wrote:
>
> This patch corrects handling of UTF-32LE and UTF32-BE in
> __unicode::__literal_encoding_is_unicode<_CharT>, so they are
> recognized as unicode and functions produces correct result for wchar_t.
>
> Use `__unicode::__field_width` to compute the estimated witdh
"width"
> of the charcter for unicode wide encoding.
"character"
>
> PR libstdc++-v3/119593
>
> libstdc++-v3/ChangeLog:
>
> * include/bits/unicode.h
> (__unicode::__literal_encoding_is_unicode<_CharT>):
> Corrected handing for UTF-16 and UTF-32 with "LE" or "BE" suffix.
> * include/std/format (__formatter_str::_S_character_width):
> Define.
> (__formatter_str::_S_character_width): Updated passed char
> length.
> * testsuite/std/format/functions/format.cc: Test for wchar_t.
> ---
> Testing on x86_64-linux. OK for trunk?
> I believe we should backport it, given that all wchar_t uses are
> impacted.
>
> libstdc++-v3/include/bits/unicode.h | 2 ++
> libstdc++-v3/include/std/format | 15 ++++++++++++++-
> .../testsuite/std/format/functions/format.cc | 8 ++++++--
> 3 files changed, 22 insertions(+), 3 deletions(-)
>
> diff --git a/libstdc++-v3/include/bits/unicode.h
> b/libstdc++-v3/include/bits/unicode.h
> index 24b1ac3d53d..99d972eccff 100644
> --- a/libstdc++-v3/include/bits/unicode.h
> +++ b/libstdc++-v3/include/bits/unicode.h
> @@ -1039,6 +1039,8 @@ inline namespace __v16_0_0
> string_view __s(__enc);
> if (__s.ends_with("//"))
> __s.remove_suffix(2);
> + if (__s.ends_with("LE") || __s.ends_with("BE"))
> + __s.remove_suffix(2);
> return __s == "16" || __s == "32";
> }
> }
> diff --git a/libstdc++-v3/include/std/format b/libstdc++-v3/include/std/format
> index c3327e1d384..603facc51de 100644
> --- a/libstdc++-v3/include/std/format
> +++ b/libstdc++-v3/include/std/format
> @@ -1277,12 +1277,25 @@ namespace __format
> _M_spec);
> }
>
Please put [[__gnu__::__always_inline__]] on this function, so that it
doesn't add any overhead for narrow chars:
> + static size_t
> + _S_character_width(_CharT __c)
> + {
> + using __unicode::__literal_encoding_is_unicode;
> + // N.B. single byte cannot encode charcter of width greater than 1
> + if (sizeof(_CharT) > 1u && __literal_encoding_is_unicode<_CharT>())
I think this can be 'if constexpr'
OK for trunk and gcc-14 with those changes, thanks.
(No backport for gcc-13 because it doesn't have the Unicode-aware
field width support.)
> + return __unicode::__field_width(__c);
> + else
> + return 1u;
> + }
> +
> template<typename _Out>
> typename basic_format_context<_Out, _CharT>::iterator
> _M_format_character(_CharT __c,
> basic_format_context<_Out, _CharT>& __fc) const
> {
> - return __format::__write_padded_as_spec({&__c, 1u}, 1, __fc,
> _M_spec);
> + return __format::__write_padded_as_spec({&__c, 1u},
> + _S_character_width(__c),
> + __fc, _M_spec);
> }
>
> template<typename _Int>
> diff --git a/libstdc++-v3/testsuite/std/format/functions/format.cc
> b/libstdc++-v3/testsuite/std/format/functions/format.cc
> index 7fc42017045..d8dbf463413 100644
> --- a/libstdc++-v3/testsuite/std/format/functions/format.cc
> +++ b/libstdc++-v3/testsuite/std/format/functions/format.cc
> @@ -501,9 +501,14 @@ test_unicode()
> {
> // Similar to sC example in test_std_examples, but not from the standard.
> // Verify that the character "🤡" has estimated field width 2,
> - // rather than estimated field width equal to strlen("🤡"), which would be
> 4.
> + // rather than estimated field width equal to strlen("🤡"), which would be
> 4,
> + // or just width 1 for single character.
> std::string sC = std::format("{:*<3}", "🤡");
> VERIFY( sC == "🤡*" );
> + std::wstring wsC = std::format(L"{:*<3}", L"🤡");
> + VERIFY( wsC == L"🤡*" );
> + wsC = std::format(L"{:*<3}", L'🤡');
> + VERIFY( wsC == L"🤡*" );
>
> // Verify that "£" has estimated field width 1, not strlen("£") == 2.
> std::string sL = std::format("{:*<3}", "£");
> @@ -517,7 +522,6 @@ test_unicode()
> std::string sP = std::format("{:1.1} {:*<1.1}", "£", "🤡");
> VERIFY( sP == "£ *" );
> sP = std::format("{:*<2.1} {:*<2.1}", "£", "🤡");
> - VERIFY( sP == "£* **" );
>
> // Verify field width handling for extended grapheme clusters,
> // and that a cluster gets output as a single item, not truncated.
> --
> 2.48.1
>