Hi, I think I found the root cause of the bug. It is because the character is not in UCS-2 but in UTF-16LE.
I first make a patch to add an error message to the g_convert function call. As attachment add_debug_message_of_g_convert.patch And then it prints "ut_iconv.cpp:379 UT_convert() Cannot convert str fffe00d801dc(6) from UCS-2LE to UTF-8 because Invalid byte sequence in conversion input" And because the conversion is failed, thus the result string is NULL. And the NULL passed to memmove() at ut_bytebuf.cpp:96 which causes the CRASH. And then I just reorder the encoding preference order in src/af/xap/xp/xap_EncodingManager.cpp. Please see attachment reorderencoding.patch. It works fine. The character is just appeared onto abiword. So my suggestion is we either do the proper handle of the conversion failure (when NULL is returned, we ignore the paste maybe). Or we can make "UTF-16LE" as preferred value rather than UCS-2. Yours, Paul -- PaulLiu (劉穎駿) E-mail: Ying-Chun Liu (PaulLiu) <paul...@debian.org>
Index: abiword-2.9.2+svn20120603/src/af/util/xp/ut_iconv.cpp =================================================================== --- abiword-2.9.2+svn20120603.orig/src/af/util/xp/ut_iconv.cpp 2012-06-04 13:20:42.000000000 +0800 +++ abiword-2.9.2+svn20120603/src/af/util/xp/ut_iconv.cpp 2012-08-13 01:27:24.090959368 +0800 @@ -355,11 +355,37 @@ UT_uint32* bytes_written_arg) { gsize _bytes_read = 0, _bytes_written = 0; - char* result = g_convert(str, len, to_codeset, from_codeset, &_bytes_read, &_bytes_written, NULL); + GError *convert_error = NULL; + char* result = g_convert(str, len, to_codeset, from_codeset, &_bytes_read, &_bytes_written, &convert_error); if (bytes_read_arg) *bytes_read_arg = _bytes_read; if (bytes_written_arg) *bytes_written_arg = _bytes_written; + if (!result && convert_error) { + gchar *strhex=NULL; + int i; + for (i=0; (i==-1)?(str[i] != '\0'):(i<len); i++) { + gchar *tmp; + tmp = g_strdup_printf("%02x",((int)str[i]) & 0x00ff); + if (strhex) { + strhex = g_strconcat(strhex, tmp, NULL); + g_free(tmp); + tmp = NULL; + } else { + strhex = tmp; + tmp = NULL; + } + } + UT_DEBUGMSG(("%s:%d %s() Cannot convert str %s(%d) from %s to %s because %s\n",__FILE__,__LINE__,__func__,strhex,len,from_codeset,to_codeset,convert_error->message)); + if (strhex) { + g_free(strhex); + strhex = NULL; + } + } + if (convert_error) { + g_error_free(convert_error); + convert_error = NULL; + } return result; }
Index: abiword-2.9.2+svn20120603/src/af/xap/xp/xap_EncodingManager.cpp =================================================================== --- abiword-2.9.2+svn20120603.orig/src/af/xap/xp/xap_EncodingManager.cpp 2012-06-04 13:20:24.000000000 +0800 +++ abiword-2.9.2+svn20120603/src/af/xap/xp/xap_EncodingManager.cpp 2012-08-13 01:34:47.758718953 +0800 @@ -1260,10 +1260,10 @@ "UTF-16-BE", // my guess 0 }; static const char * (szUCS2LENames[]) = { + "UTF-16LE", // superset "UCS-2LE", // preferred "UCS-2-LE", // older libiconv "UNICODELITTLE", // older glibc - "UTF-16LE", // superset "UTF-16-LE", // my guess 0 };
signature.asc
Description: OpenPGP digital signature