This patch corrects handling of UTF-32LE and UTF32-BE in
__unicode::__literal_encoding_is_unicode<_CharT>, so they are
recognized as unicode and functions produces correct result for wchar_t.
Use `__unicode::__field_width` to compute the estimated witdh
of the charcter for unicode wide encoding.
PR libstdc++-v3/119593
libstdc++-v3/ChangeLog:
* include/bits/unicode.h
(__unicode::__literal_encoding_is_unicode<_CharT>):
Corrected handing for UTF-16 and UTF-32 with "LE" or "BE" suffix.
* include/std/format (__formatter_str::_S_character_width):
Define.
(__formatter_str::_S_character_width): Updated passed char
length.
* testsuite/std/format/functions/format.cc: Test for wchar_t.
---
Testing on x86_64-linux. OK for trunk?
I believe we should backport it, given that all wchar_t uses are
impacted.
libstdc++-v3/include/bits/unicode.h | 2 ++
libstdc++-v3/include/std/format | 15 ++++++++++++++-
.../testsuite/std/format/functions/format.cc | 8 ++++++--
3 files changed, 22 insertions(+), 3 deletions(-)
diff --git a/libstdc++-v3/include/bits/unicode.h
b/libstdc++-v3/include/bits/unicode.h
index 24b1ac3d53d..99d972eccff 100644
--- a/libstdc++-v3/include/bits/unicode.h
+++ b/libstdc++-v3/include/bits/unicode.h
@@ -1039,6 +1039,8 @@ inline namespace __v16_0_0
string_view __s(__enc);
if (__s.ends_with("//"))
__s.remove_suffix(2);
+ if (__s.ends_with("LE") || __s.ends_with("BE"))
+ __s.remove_suffix(2);
return __s == "16" || __s == "32";
}
}
diff --git a/libstdc++-v3/include/std/format b/libstdc++-v3/include/std/format
index c3327e1d384..603facc51de 100644
--- a/libstdc++-v3/include/std/format
+++ b/libstdc++-v3/include/std/format
@@ -1277,12 +1277,25 @@ namespace __format
_M_spec);
}
+ static size_t
+ _S_character_width(_CharT __c)
+ {
+ using __unicode::__literal_encoding_is_unicode;
+ // N.B. single byte cannot encode charcter of width greater than 1
+ if (sizeof(_CharT) > 1u && __literal_encoding_is_unicode<_CharT>())
+ return __unicode::__field_width(__c);
+ else
+ return 1u;
+ }
+
template<typename _Out>
typename basic_format_context<_Out, _CharT>::iterator
_M_format_character(_CharT __c,
basic_format_context<_Out, _CharT>& __fc) const
{
- return __format::__write_padded_as_spec({&__c, 1u}, 1, __fc, _M_spec);
+ return __format::__write_padded_as_spec({&__c, 1u},
+ _S_character_width(__c),
+ __fc, _M_spec);
}
template<typename _Int>
diff --git a/libstdc++-v3/testsuite/std/format/functions/format.cc
b/libstdc++-v3/testsuite/std/format/functions/format.cc
index 7fc42017045..d8dbf463413 100644
--- a/libstdc++-v3/testsuite/std/format/functions/format.cc
+++ b/libstdc++-v3/testsuite/std/format/functions/format.cc
@@ -501,9 +501,14 @@ test_unicode()
{
// Similar to sC example in test_std_examples, but not from the standard.
// Verify that the character "🤡" has estimated field width 2,
- // rather than estimated field width equal to strlen("🤡"), which would be 4.
+ // rather than estimated field width equal to strlen("🤡"), which would be 4,
+ // or just width 1 for single character.
std::string sC = std::format("{:*<3}", "🤡");
VERIFY( sC == "🤡*" );
+ std::wstring wsC = std::format(L"{:*<3}", L"🤡");
+ VERIFY( wsC == L"🤡*" );
+ wsC = std::format(L"{:*<3}", L'🤡');
+ VERIFY( wsC == L"🤡*" );
// Verify that "£" has estimated field width 1, not strlen("£") == 2.
std::string sL = std::format("{:*<3}", "£");
@@ -517,7 +522,6 @@ test_unicode()
std::string sP = std::format("{:1.1} {:*<1.1}", "£", "🤡");
VERIFY( sP == "£ *" );
sP = std::format("{:*<2.1} {:*<2.1}", "£", "🤡");
- VERIFY( sP == "£* **" );
// Verify field width handling for extended grapheme clusters,
// and that a cluster gets output as a single item, not truncated.
--
2.48.1