Hi,

I think I found the root cause of the bug.
It is because the character is not in UCS-2 but in UTF-16LE.

I first make a patch to add an error message to the g_convert function call.
As attachment add_debug_message_of_g_convert.patch

And then it prints
"ut_iconv.cpp:379 UT_convert() Cannot convert str fffe00d801dc(6) from
UCS-2LE to UTF-8 because Invalid byte sequence in conversion input"

And because the conversion is failed, thus the result string is NULL.
And the NULL passed to memmove() at ut_bytebuf.cpp:96 which causes the
CRASH.

And then I just reorder the encoding preference order in
src/af/xap/xp/xap_EncodingManager.cpp. Please see attachment
reorderencoding.patch.
It works fine. The character is just appeared onto abiword.

So my suggestion is we either do the proper handle of the conversion
failure (when NULL is returned, we ignore the paste maybe). Or we can
make "UTF-16LE" as preferred value rather than UCS-2.

Yours,
Paul

-- 
                                PaulLiu (劉穎駿)
E-mail: Ying-Chun Liu (PaulLiu) <paul...@debian.org>
Index: abiword-2.9.2+svn20120603/src/af/util/xp/ut_iconv.cpp
===================================================================
--- abiword-2.9.2+svn20120603.orig/src/af/util/xp/ut_iconv.cpp	2012-06-04 13:20:42.000000000 +0800
+++ abiword-2.9.2+svn20120603/src/af/util/xp/ut_iconv.cpp	2012-08-13 01:27:24.090959368 +0800
@@ -355,11 +355,37 @@
 		  UT_uint32*	bytes_written_arg)
 {
 	gsize _bytes_read = 0, _bytes_written = 0;
-	char* result = g_convert(str, len, to_codeset, from_codeset, &_bytes_read, &_bytes_written, NULL);
+	GError *convert_error = NULL;
+	char* result = g_convert(str, len, to_codeset, from_codeset, &_bytes_read, &_bytes_written, &convert_error);
 
 	if (bytes_read_arg) *bytes_read_arg = _bytes_read;
 	if (bytes_written_arg) *bytes_written_arg = _bytes_written;
 
+	if (!result && convert_error) {
+		gchar *strhex=NULL;
+		int i;
+		for (i=0; (i==-1)?(str[i] != '\0'):(i<len); i++) {
+			gchar *tmp;
+			tmp = g_strdup_printf("%02x",((int)str[i]) & 0x00ff);
+			if (strhex) {
+				strhex = g_strconcat(strhex, tmp, NULL);
+				g_free(tmp);
+				tmp = NULL;
+			} else {
+				strhex = tmp;
+				tmp = NULL;
+			}
+		}
+		UT_DEBUGMSG(("%s:%d %s() Cannot convert str %s(%d) from %s to %s because %s\n",__FILE__,__LINE__,__func__,strhex,len,from_codeset,to_codeset,convert_error->message));
+		if (strhex) {
+			g_free(strhex);
+			strhex = NULL;
+		}
+	}
+	if (convert_error) {
+		g_error_free(convert_error);
+		convert_error = NULL;
+	}
 	return result;
 }
 
Index: abiword-2.9.2+svn20120603/src/af/xap/xp/xap_EncodingManager.cpp
===================================================================
--- abiword-2.9.2+svn20120603.orig/src/af/xap/xp/xap_EncodingManager.cpp	2012-06-04 13:20:24.000000000 +0800
+++ abiword-2.9.2+svn20120603/src/af/xap/xp/xap_EncodingManager.cpp	2012-08-13 01:34:47.758718953 +0800
@@ -1260,10 +1260,10 @@
 		"UTF-16-BE",		// my guess
 		0 };
 	static const char * (szUCS2LENames[]) = {
+		"UTF-16LE",			// superset
 		"UCS-2LE",			// preferred
 		"UCS-2-LE",			// older libiconv
 		"UNICODELITTLE",	// older glibc
-		"UTF-16LE",			// superset
 		"UTF-16-LE",		// my guess
 		0 };
 

Attachment: signature.asc
Description: OpenPGP digital signature

Reply via email to