Hi Jeff:

In the meantime, I looked into ways to implement the GMime parser error 
reporting feature.  Instead of using a signal emitted by the GMimeParser 
object, I think it might be better to add a “issue callback” to 
GMimeParserOptions, as it is also used by many other functions.

The attached little patch implements a /very/ basic feasibility demonstration:
- add the callback to GMimeParserOptions;
- call it from the parser if a duplicated “Content-*” header is detected, and 
drop the duplicated header;
- check for duplicated header parameters in decode_param_list() and keep only 
the first one.

Note that for the latter, it is not possible to report the originating object 
and parser offset, as it is not passed downstream into the function.  Do you 
have an idea how this could be achieved, without adding extra parameters to all 
the functions?  Maybe add an internal reference to the parser object to 
GMimeParserOptions?

The possibility of removing “evil” headers or parameters should probably be 
configurable through GMimeParserOptions.  If it is activated, it can be used to 
parse and re-write a sanitised message with a well-known behaviour.

Running the attached suspicious message data block (packed, as Balsa would 
attach it as message/rfc822 otherwise…) through the modified test-parser 
application, all 6 issues are detected and reported.

What do you think about this approach?  Should I proceed this way?

Cheers, Albrecht.
diff --git a/gmime/gmime-internal.h b/gmime/gmime-internal.h
index eff1bc6..32dc73d 100644
--- a/gmime/gmime-internal.h
+++ b/gmime/gmime-internal.h
@@ -50,6 +50,8 @@ G_GNUC_INTERNAL GMimeFormatOptions *_g_mime_format_options_clone (GMimeFormatOpt
 /* GMimeParserOptions */
 G_GNUC_INTERNAL void g_mime_parser_options_init (void);
 G_GNUC_INTERNAL void g_mime_parser_options_shutdown (void);
+G_GNUC_INTERNAL void _g_mime_parser_options_notify (GMimeParserOptions *options, GObject *object, gint64 offset,
+						    GMimeError errcode, const gchar *item);
 
 /* GMimeHeader */
 //G_GNUC_INTERNAL void _g_mime_header_set_raw_value (GMimeHeader *header, const char *raw_value);
diff --git a/gmime/gmime-param.c b/gmime/gmime-param.c
index 29b44eb..b875322 100644
--- a/gmime/gmime-param.c
+++ b/gmime/gmime-param.c
@@ -1361,24 +1361,28 @@ decode_param_list (GMimeParserOptions *options, const char *in)
 				g_free (name);
 			}
 		} else {
-			param = g_mime_param_new ();
-			param->name = name;
-			
-			if (encoded) {
-				/* singleton encoded rfc2184 param value */
-				param->value = rfc2184_decode (value, &charset, &lang);
-				param->charset = charset;
-				param->method = method;
-				param->lang = lang;
-				g_free (value);
+			if (g_mime_param_list_get_parameter(params, name) != NULL) {
+				_g_mime_parser_options_notify (options, NULL, 0U, GMIME_ERR_DUPLICATED_PARAMETER, name);
 			} else {
-				/* normal parameter value */
-				param->charset = rfc2047_charset ? g_strdup (rfc2047_charset) : NULL;
-				param->method = method;
-				param->value = value;
+				param = g_mime_param_new ();
+				param->name = name;
+
+				if (encoded) {
+					/* singleton encoded rfc2184 param value */
+					param->value = rfc2184_decode (value, &charset, &lang);
+					param->charset = charset;
+					param->method = method;
+					param->lang = lang;
+					g_free (value);
+				} else {
+					/* normal parameter value */
+					param->charset = rfc2047_charset ? g_strdup (rfc2047_charset) : NULL;
+					param->method = method;
+					param->value = value;
+				}
+
+				g_mime_param_list_add (params, param);
 			}
-			
-			g_mime_param_list_add (params, param);
 		}
 		
 		skip_cfws (&inptr);
diff --git a/gmime/gmime-parser-options.c b/gmime/gmime-parser-options.c
index 7eb05e8..025dd44 100644
--- a/gmime/gmime-parser-options.c
+++ b/gmime/gmime-parser-options.c
@@ -48,6 +48,7 @@ struct _GMimeParserOptions {
 	GMimeRfcComplianceMode rfc2047;
 	gboolean allow_no_domain;
 	char **charsets;
+	GMimeParserIssueCB issue_cb;
 };
 
 static GMimeParserOptions *default_options = NULL;
@@ -72,6 +73,14 @@ g_mime_parser_options_shutdown (void)
 	default_options = NULL;
 }
 
+void
+_g_mime_parser_options_notify (GMimeParserOptions *options, GObject *object, guint offset,
+			       guint issue, const gchar *item)
+{
+	if ((options != NULL) && (options->issue_cb != NULL)) {
+		options->issue_cb(object, offset, issue, item);
+	}
+}
 
 /**
  * g_mime_parser_options_get_default:
@@ -110,6 +119,8 @@ g_mime_parser_options_new (void)
 	options->charsets[1] = g_strdup ("iso-8859-1");
 	options->charsets[2] = NULL;
 	
+	options->issue_cb = NULL;
+
 	return options;
 }
 
@@ -145,6 +156,8 @@ g_mime_parser_options_clone (GMimeParserOptions *options)
 		clone->charsets[i] = g_strdup (options->charsets[i]);
 	clone->charsets[i] = NULL;
 	
+	clone->issue_cb = options->issue_cb;
+
 	return clone;
 }
 
@@ -393,3 +406,18 @@ g_mime_parser_options_set_fallback_charsets (GMimeParserOptions *options, const
 		options->charsets[i] = g_strdup (charsets[i]);
 	options->charsets[n] = NULL;
 }
+
+
+GMimeParserIssueCB
+g_mime_parser_options_get_issue_cb (GMimeParserOptions *options)
+{
+	return (options != NULL) ? options->issue_cb : default_options->issue_cb;
+}
+
+void
+g_mime_parser_options_set_issue_cb (GMimeParserOptions *options, GMimeParserIssueCB issue_cb)
+{
+	g_return_if_fail (options != NULL);
+
+	options->issue_cb = issue_cb;
+}
diff --git a/gmime/gmime-parser-options.h b/gmime/gmime-parser-options.h
index b5c9af7..e90c765 100644
--- a/gmime/gmime-parser-options.h
+++ b/gmime/gmime-parser-options.h
@@ -41,6 +41,11 @@ typedef enum {
 	GMIME_RFC_COMPLIANCE_STRICT
 } GMimeRfcComplianceMode;
 
+typedef enum {
+	GMIME_ERR_DUPLICATED_CONTENT_HDR = 1U,
+	GMIME_ERR_DUPLICATED_PARAMETER,
+} GMimeError;
+
 /**
  * GMimeParserOptions:
  *
@@ -48,6 +53,8 @@ typedef enum {
  **/
 typedef struct _GMimeParserOptions GMimeParserOptions;
 
+typedef void (*GMimeParserIssueCB) (GObject *object, gint64 offset, GMimeError errcode, const gchar *item);
+
 
 GType g_mime_parser_options_get_type (void);
 
@@ -73,6 +80,9 @@ void g_mime_parser_options_set_rfc2047_compliance_mode (GMimeParserOptions *opti
 const char **g_mime_parser_options_get_fallback_charsets (GMimeParserOptions *options);
 void g_mime_parser_options_set_fallback_charsets (GMimeParserOptions *options, const char **charsets);
 
+GMimeParserIssueCB g_mime_parser_options_get_issue_cb (GMimeParserOptions *options);
+void g_mime_parser_options_set_issue_cb (GMimeParserOptions *options, GMimeParserIssueCB issue_cb);
+
 G_END_DECLS
 
 #endif /* __GMIME_PARSER_OPTIONS_H__ */
diff --git a/gmime/gmime-parser.c b/gmime/gmime-parser.c
index 092279c..c6871a8 100644
--- a/gmime/gmime-parser.c
+++ b/gmime/gmime-parser.c
@@ -1671,8 +1671,13 @@ parser_scan_message_part (GMimeParser *parser, GMimeParserOptions *options, GMim
 		header = priv->headers->pdata[i];
 		
 		if (g_ascii_strncasecmp (header->name, "Content-", 8) != 0) {
-			_g_mime_object_append_header ((GMimeObject *) message, header->name, header->raw_name,
-						      header->raw_value, header->offset);
+			if (g_mime_object_get_header((GMimeObject *) message, header->name) != NULL) {
+				_g_mime_parser_options_notify (options, G_OBJECT(parser), header->offset,
+							       GMIME_ERR_DUPLICATED_CONTENT_HDR, header->name);
+			} else {
+				_g_mime_object_append_header ((GMimeObject *) message, header->name, header->raw_name,
+							      header->raw_value, header->offset);
+			}
 		}
 	}
 	
@@ -1713,8 +1718,13 @@ parser_construct_leaf_part (GMimeParser *parser, GMimeParserOptions *options, Co
 		header = priv->headers->pdata[i];
 		
 		if (!toplevel || !g_ascii_strncasecmp (header->name, "Content-", 8)) {
-			_g_mime_object_append_header (object, header->name, header->raw_name,
-						      header->raw_value, header->offset);
+			if (g_mime_object_get_header(object, header->name) != NULL) {
+				_g_mime_parser_options_notify (options, G_OBJECT(parser), header->offset,
+							       GMIME_ERR_DUPLICATED_CONTENT_HDR, header->name);
+			} else {
+				_g_mime_object_append_header (object, header->name, header->raw_name,
+							      header->raw_value, header->offset);
+			}
 		}
 	}
 	
@@ -1856,8 +1866,13 @@ parser_construct_multipart (GMimeParser *parser, GMimeParserOptions *options, Co
 		header = priv->headers->pdata[i];
 		
 		if (!toplevel || !g_ascii_strncasecmp (header->name, "Content-", 8)) {
-			_g_mime_object_append_header (object, header->name, header->raw_name,
-						      header->raw_value, header->offset);
+			if (g_mime_object_get_header(object, header->name) != NULL) {
+				_g_mime_parser_options_notify (options, G_OBJECT(parser), header->offset,
+							       GMIME_ERR_DUPLICATED_CONTENT_HDR, header->name);
+			} else {
+				_g_mime_object_append_header (object, header->name, header->raw_name,
+						      	      header->raw_value, header->offset);
+			}
 		}
 	}
 	
@@ -1981,6 +1996,7 @@ parser_construct_message (GMimeParser *parser, GMimeParserOptions *options)
 	
 	message = g_mime_message_new (FALSE);
 	((GMimeObject *) message)->ensure_newline = FALSE;
+	_g_mime_header_list_set_options(g_mime_object_get_header_list(GMIME_OBJECT(message)), options);
 	
 	for (i = 0; i < priv->headers->len; i++) {
 		header = priv->headers->pdata[i];
@@ -1996,8 +2012,13 @@ parser_construct_message (GMimeParser *parser, GMimeParserOptions *options)
 		}
 		
 		if (g_ascii_strncasecmp (header->name, "Content-", 8) != 0) {
-			_g_mime_object_append_header ((GMimeObject *) message, header->name, header->raw_name,
-						      header->raw_value, header->offset);
+			if (g_mime_object_get_header((GMimeObject *) message, header->name) != NULL) {
+				_g_mime_parser_options_notify (options, G_OBJECT(parser), header->offset,
+							       GMIME_ERR_DUPLICATED_CONTENT_HDR, header->name);
+			} else {
+				_g_mime_object_append_header ((GMimeObject *) message, header->name, header->raw_name,
+							      header->raw_value, header->offset);
+			}
 		}
 	}
 	
diff --git a/tests/test-parser.c b/tests/test-parser.c
index 94f7161..a0a25f1 100644
--- a/tests/test-parser.c
+++ b/tests/test-parser.c
@@ -154,9 +154,16 @@ print_mime_struct_iter (GMimeMessage *message)
 #endif /* PRINT_MIME_STRUCT */
 
 static void
+report_issue(GObject *object, gint64 offset, GMimeError errcode, const gchar *item)
+{
+	g_warning("%p %" G_GINT64_FORMAT " %u %s", object, offset, (unsigned) errcode, item);
+}
+
+static void
 test_parser (GMimeStream *stream)
 {
 	GMimeFormatOptions *format = g_mime_format_options_get_default ();
+	GMimeParserOptions *options;
 	GMimeParser *parser;
 	GMimeMessage *message;
 	char *text;
@@ -167,7 +174,9 @@ test_parser (GMimeStream *stream)
 	g_mime_parser_init_with_stream (parser, stream);
 	
 	ZenTimerStart (NULL);
-	message = g_mime_parser_construct_message (parser, NULL);
+	options = g_mime_parser_options_new();
+	g_mime_parser_options_set_issue_cb(options, report_issue);
+	message = g_mime_parser_construct_message (parser, options);
 	ZenTimerStop (NULL);
 	ZenTimerReport (NULL, "gmime::parser_construct_message");
 	

Attachment: BADMAIL.gz
Description: application/gzip

Attachment: pgph3cpDhj1LB.pgp
Description: PGP signature

_______________________________________________
balsa-list mailing list
[email protected]
https://mail.gnome.org/mailman/listinfo/balsa-list

Reply via email to