Package: libstdc++6 Version: 4.1.1-5 Severity: important The attached source file (UTF-8 encoded) demonstrates that codecvt is broken for the simplest of transformations (UTF-8 to UCS-4). This is pretty basic, and the underlying gconf stuff works correctly, so the bug is either in libstdc++6 or somewhere inline in the headers.
$ ./wide wide: ../iconv/loop.c:425: utf8_internal_loop_single: Assertion `inptr - bytebuf > (state->__count & 7)' failed. Aborted While running: (gdb) bt #0 0x0fcc672c in __gconv_transform_utf8_internal () from /lib/tls/libc.so.6 #1 0x0fe0425c in ?? () from /lib/tls/libc.so.6 #2 0x0ffa6ef8 in std::codecvt<wchar_t, char, __mbstate_t>::do_in () from /usr/lib/libstdc++.so.6 #3 0x100016b4 in std::__codecvt_abstract_base<wchar_t, char, __mbstate_t>::in (this=0x100290b8, [EMAIL PROTECTED], __from=0x10013014 "[EMAIL PROTECTED]", __from_end=0x1001301d "", [EMAIL PROTECTED], __to=0x7fa405bc, __to_end=0x7fa406fc, [EMAIL PROTECTED]) at /usr/lib/gcc/powerpc-linux-gnu/4.1.2/../../../../include/c++/4.1.2/bits/codecvt.h:204 #4 0x10001244 in to_wide_string ([EMAIL PROTECTED], [EMAIL PROTECTED]) at wide.cc:22 #5 0x10001544 in main () at wide.cc:59 After aborting: wide: ../iconv/loop.c:425: utf8_internal_loop_single: Assertion `inptr - bytebuf > (state->__count & 7)' failed. Program received signal SIGABRT, Aborted. 0x0fcd67bc in raise () from /lib/tls/libc.so.6 (gdb) bt #0 0x0fcd67bc in raise () from /lib/tls/libc.so.6 #1 0x0fcd82c0 in abort () from /lib/tls/libc.so.6 #2 0x0fcce768 in __assert_fail () from /lib/tls/libc.so.6 #3 0x0fcc6c7c in __gconv_transform_utf8_internal () from /lib/tls/libc.so.6 #4 0x0fcc6c7c in __gconv_transform_utf8_internal () from /lib/tls/libc.so.6 #5 0x0fcc6c7c in __gconv_transform_utf8_internal () from /lib/tls/libc.so.6 #6 0x0fcc6c7c in __gconv_transform_utf8_internal () from /lib/tls/libc.so.6 #7 0x0fcc6c7c in __gconv_transform_utf8_internal () from /lib/tls/libc.so.6 #8 0x0fcc6c7c in __gconv_transform_utf8_internal () from /lib/tls/libc.so.6 #9 0x0fcc6c7c in __gconv_transform_utf8_internal () from /lib/tls/libc.so.6 #10 0x0fcc6c7c in __gconv_transform_utf8_internal () from /lib/tls/libc.so.6 #11 0x0fcc6c7c in __gconv_transform_utf8_internal () from /lib/tls/libc.so.6 Previous frame inner to this frame (corrupt stack?) Regards, Roger -- System Information: Debian Release: testing/unstable APT prefers unstable APT policy: (990, 'unstable') Architecture: powerpc (ppc) Shell: /bin/sh linked to /bin/bash Kernel: Linux 2.6.16.17 Locale: LANG=en_GB.UTF8, LC_CTYPE=en_GB.UTF8 (charmap=UTF-8) Versions of packages libstdc++6 depends on: ii gcc-4.1-base 4.1.1-5 The GNU Compiler Collection (base ii libc6 2.3.6-15 GNU C Library: Shared libraries ii libgcc1 1:4.1.1-5 GCC support library libstdc++6 recommends no packages. -- no debconf information
#include <iostream> #include <locale> #include <string> std::wstring to_wide_string (std::string const& str, std::locale locale) { typedef std::codecvt<wchar_t, char, mbstate_t> codecvt_type; codecvt_type const& cvt = std::use_facet<codecvt_type>(locale); mbstate_t state; const char *cbegin = str.data(), *cend = str.data() + str.size(), *cnext; wchar_t *wcnext; wchar_t wcbuf[80]; std::wstring ret; while (1) { std::codecvt_base::result res = cvt.in(state, cbegin, cend, cnext, wcbuf, wcbuf + (sizeof(wcbuf) / sizeof(wcbuf[0])), wcnext); if (res == std::codecvt_base::ok || std::codecvt_base::partial) { std::cout << "1" << std::endl; ret += std::wstring(wcbuf, wcnext); if (cend == cnext) break; } else if (res == std::codecvt_base::noconv) { std::cout << "2" << std::endl; ret += std::wstring(cbegin, cend); break; } else if (res == std::codecvt_base::error) { std::cout << "3" << std::endl; break; } else break; std::cout << "4" << std::endl; cbegin = cnext; } return ret; } int main() { std::locale::global(std::locale("")); std::cout.imbue(std::locale()); std::wcerr.imbue(std::locale()); std::string foo = "fffäß»"; std::wstring wfoo = to_wide_string(foo, std::wcerr.getloc()); std::cout << foo; std::wcerr << wfoo; return 0; }