Hi all,

Here's the updated patch. It filters the message through the
GMimeFilterWindows that Jeff mentioned and then uses the charset it
detects for GMimeFilterCharset in the actual rendering of the message.

Jeff, is this how to use the filter correctly?

Cheers,
Sebastian


diff -ura notmuch-0.27/notmuch-show.c notmuch-0.27-patched/notmuch-show.c
--- notmuch-0.27/notmuch-show.c	2018-06-13 03:42:34.000000000 +0200
+++ notmuch-0.27-patched/notmuch-show.c	2018-07-28 10:25:25.358502880 +0200
@@ -271,7 +271,10 @@
 {
     GMimeContentType *content_type = g_mime_object_get_content_type (GMIME_OBJECT (part));
     GMimeStream *stream_filter = NULL;
+    GMimeStream *null_stream = NULL;
+    GMimeStream *null_stream_filter = NULL;
     GMimeFilter *crlf_filter = NULL;
+    GMimeFilter *windows_filter = NULL;
     GMimeDataWrapper *wrapper;
     const char *charset;
 
@@ -282,13 +285,27 @@
     if (stream_out == NULL)
 	return;
 
+    charset = g_mime_object_get_content_type_parameter (part, "charset");
+    wrapper = g_mime_part_get_content_object (GMIME_PART (part));
+    if (wrapper && charset) {
+	/* Check for mislabeled Windows encoding */
+	null_stream = g_mime_stream_null_new ();
+	null_stream_filter = g_mime_stream_filter_new (null_stream);
+	windows_filter = g_mime_filter_windows_new (charset);
+	g_mime_stream_filter_add(GMIME_STREAM_FILTER (null_stream_filter),
+				 windows_filter);
+	g_mime_data_wrapper_write_to_stream (wrapper, null_stream_filter);
+	charset = g_mime_filter_windows_real_charset(
+	    (GMimeFilterWindows *) windows_filter);
+	g_object_unref (windows_filter);
+    }
+
     stream_filter = g_mime_stream_filter_new (stream_out);
     crlf_filter = g_mime_filter_crlf_new (false, false);
     g_mime_stream_filter_add(GMIME_STREAM_FILTER (stream_filter),
 			     crlf_filter);
     g_object_unref (crlf_filter);
 
-    charset = g_mime_object_get_content_type_parameter (part, "charset");
     if (charset) {
 	GMimeFilter *charset_filter;
 	charset_filter = g_mime_filter_charset_new (charset, "UTF-8");
@@ -313,9 +330,12 @@
 	}
     }
 
-    wrapper = g_mime_part_get_content_object (GMIME_PART (part));
     if (wrapper && stream_filter)
 	g_mime_data_wrapper_write_to_stream (wrapper, stream_filter);
+    if (null_stream_filter)
+	g_object_unref (null_stream_filter);
+    if (null_stream)
+	g_object_unref (null_stream);
     if (stream_filter)
 	g_object_unref(stream_filter);
 }


Sebastian Poeplau <sebastian.poep...@eurecom.fr> writes:

> Hi Jeff,
>
>> GMime actually comes with a stream filter (GMimeFilterWindows) which can 
>> auto-detect this situation.
>>
>> In this particular case, you'd instantiate the GMimeFilterWindows like this:
>>
>> filter = g_mime_filter_windows_new ("iso-8859-1");
>>
>> "iso-8859-1" being the charset that the content claims to be in.
>>
>> Then you'd pipe the raw (decoded but not converted to utf-8) content though 
>> the filter and afterward call g_mime_filter_windows_real_charset (filter) 
>> which would return, in this user's case,  "windows-1252".
>
> Nice, this is exactly what I was looking for! Somehow I missed it when
> checking GMime. I'll adapt my local fix and post the results here.
>
> Thanks,
> Sebastian
_______________________________________________
notmuch mailing list
notmuch@notmuchmail.org
https://notmuchmail.org/mailman/listinfo/notmuch

Reply via email to