Hi Jeff: In the meantime, I looked into ways to implement the GMime parser error reporting feature. Instead of using a signal emitted by the GMimeParser object, I think it might be better to add a “issue callback” to GMimeParserOptions, as it is also used by many other functions.
The attached little patch implements a /very/ basic feasibility demonstration: - add the callback to GMimeParserOptions; - call it from the parser if a duplicated “Content-*” header is detected, and drop the duplicated header; - check for duplicated header parameters in decode_param_list() and keep only the first one. Note that for the latter, it is not possible to report the originating object and parser offset, as it is not passed downstream into the function. Do you have an idea how this could be achieved, without adding extra parameters to all the functions? Maybe add an internal reference to the parser object to GMimeParserOptions? The possibility of removing “evil” headers or parameters should probably be configurable through GMimeParserOptions. If it is activated, it can be used to parse and re-write a sanitised message with a well-known behaviour. Running the attached suspicious message data block (packed, as Balsa would attach it as message/rfc822 otherwise…) through the modified test-parser application, all 6 issues are detected and reported. What do you think about this approach? Should I proceed this way? Cheers, Albrecht.
diff --git a/gmime/gmime-internal.h b/gmime/gmime-internal.h
index eff1bc6..32dc73d 100644
--- a/gmime/gmime-internal.h
+++ b/gmime/gmime-internal.h
@@ -50,6 +50,8 @@ G_GNUC_INTERNAL GMimeFormatOptions *_g_mime_format_options_clone (GMimeFormatOpt
/* GMimeParserOptions */
G_GNUC_INTERNAL void g_mime_parser_options_init (void);
G_GNUC_INTERNAL void g_mime_parser_options_shutdown (void);
+G_GNUC_INTERNAL void _g_mime_parser_options_notify (GMimeParserOptions *options, GObject *object, gint64 offset,
+ GMimeError errcode, const gchar *item);
/* GMimeHeader */
//G_GNUC_INTERNAL void _g_mime_header_set_raw_value (GMimeHeader *header, const char *raw_value);
diff --git a/gmime/gmime-param.c b/gmime/gmime-param.c
index 29b44eb..b875322 100644
--- a/gmime/gmime-param.c
+++ b/gmime/gmime-param.c
@@ -1361,24 +1361,28 @@ decode_param_list (GMimeParserOptions *options, const char *in)
g_free (name);
}
} else {
- param = g_mime_param_new ();
- param->name = name;
-
- if (encoded) {
- /* singleton encoded rfc2184 param value */
- param->value = rfc2184_decode (value, &charset, &lang);
- param->charset = charset;
- param->method = method;
- param->lang = lang;
- g_free (value);
+ if (g_mime_param_list_get_parameter(params, name) != NULL) {
+ _g_mime_parser_options_notify (options, NULL, 0U, GMIME_ERR_DUPLICATED_PARAMETER, name);
} else {
- /* normal parameter value */
- param->charset = rfc2047_charset ? g_strdup (rfc2047_charset) : NULL;
- param->method = method;
- param->value = value;
+ param = g_mime_param_new ();
+ param->name = name;
+
+ if (encoded) {
+ /* singleton encoded rfc2184 param value */
+ param->value = rfc2184_decode (value, &charset, &lang);
+ param->charset = charset;
+ param->method = method;
+ param->lang = lang;
+ g_free (value);
+ } else {
+ /* normal parameter value */
+ param->charset = rfc2047_charset ? g_strdup (rfc2047_charset) : NULL;
+ param->method = method;
+ param->value = value;
+ }
+
+ g_mime_param_list_add (params, param);
}
-
- g_mime_param_list_add (params, param);
}
skip_cfws (&inptr);
diff --git a/gmime/gmime-parser-options.c b/gmime/gmime-parser-options.c
index 7eb05e8..025dd44 100644
--- a/gmime/gmime-parser-options.c
+++ b/gmime/gmime-parser-options.c
@@ -48,6 +48,7 @@ struct _GMimeParserOptions {
GMimeRfcComplianceMode rfc2047;
gboolean allow_no_domain;
char **charsets;
+ GMimeParserIssueCB issue_cb;
};
static GMimeParserOptions *default_options = NULL;
@@ -72,6 +73,14 @@ g_mime_parser_options_shutdown (void)
default_options = NULL;
}
+void
+_g_mime_parser_options_notify (GMimeParserOptions *options, GObject *object, guint offset,
+ guint issue, const gchar *item)
+{
+ if ((options != NULL) && (options->issue_cb != NULL)) {
+ options->issue_cb(object, offset, issue, item);
+ }
+}
/**
* g_mime_parser_options_get_default:
@@ -110,6 +119,8 @@ g_mime_parser_options_new (void)
options->charsets[1] = g_strdup ("iso-8859-1");
options->charsets[2] = NULL;
+ options->issue_cb = NULL;
+
return options;
}
@@ -145,6 +156,8 @@ g_mime_parser_options_clone (GMimeParserOptions *options)
clone->charsets[i] = g_strdup (options->charsets[i]);
clone->charsets[i] = NULL;
+ clone->issue_cb = options->issue_cb;
+
return clone;
}
@@ -393,3 +406,18 @@ g_mime_parser_options_set_fallback_charsets (GMimeParserOptions *options, const
options->charsets[i] = g_strdup (charsets[i]);
options->charsets[n] = NULL;
}
+
+
+GMimeParserIssueCB
+g_mime_parser_options_get_issue_cb (GMimeParserOptions *options)
+{
+ return (options != NULL) ? options->issue_cb : default_options->issue_cb;
+}
+
+void
+g_mime_parser_options_set_issue_cb (GMimeParserOptions *options, GMimeParserIssueCB issue_cb)
+{
+ g_return_if_fail (options != NULL);
+
+ options->issue_cb = issue_cb;
+}
diff --git a/gmime/gmime-parser-options.h b/gmime/gmime-parser-options.h
index b5c9af7..e90c765 100644
--- a/gmime/gmime-parser-options.h
+++ b/gmime/gmime-parser-options.h
@@ -41,6 +41,11 @@ typedef enum {
GMIME_RFC_COMPLIANCE_STRICT
} GMimeRfcComplianceMode;
+typedef enum {
+ GMIME_ERR_DUPLICATED_CONTENT_HDR = 1U,
+ GMIME_ERR_DUPLICATED_PARAMETER,
+} GMimeError;
+
/**
* GMimeParserOptions:
*
@@ -48,6 +53,8 @@ typedef enum {
**/
typedef struct _GMimeParserOptions GMimeParserOptions;
+typedef void (*GMimeParserIssueCB) (GObject *object, gint64 offset, GMimeError errcode, const gchar *item);
+
GType g_mime_parser_options_get_type (void);
@@ -73,6 +80,9 @@ void g_mime_parser_options_set_rfc2047_compliance_mode (GMimeParserOptions *opti
const char **g_mime_parser_options_get_fallback_charsets (GMimeParserOptions *options);
void g_mime_parser_options_set_fallback_charsets (GMimeParserOptions *options, const char **charsets);
+GMimeParserIssueCB g_mime_parser_options_get_issue_cb (GMimeParserOptions *options);
+void g_mime_parser_options_set_issue_cb (GMimeParserOptions *options, GMimeParserIssueCB issue_cb);
+
G_END_DECLS
#endif /* __GMIME_PARSER_OPTIONS_H__ */
diff --git a/gmime/gmime-parser.c b/gmime/gmime-parser.c
index 092279c..c6871a8 100644
--- a/gmime/gmime-parser.c
+++ b/gmime/gmime-parser.c
@@ -1671,8 +1671,13 @@ parser_scan_message_part (GMimeParser *parser, GMimeParserOptions *options, GMim
header = priv->headers->pdata[i];
if (g_ascii_strncasecmp (header->name, "Content-", 8) != 0) {
- _g_mime_object_append_header ((GMimeObject *) message, header->name, header->raw_name,
- header->raw_value, header->offset);
+ if (g_mime_object_get_header((GMimeObject *) message, header->name) != NULL) {
+ _g_mime_parser_options_notify (options, G_OBJECT(parser), header->offset,
+ GMIME_ERR_DUPLICATED_CONTENT_HDR, header->name);
+ } else {
+ _g_mime_object_append_header ((GMimeObject *) message, header->name, header->raw_name,
+ header->raw_value, header->offset);
+ }
}
}
@@ -1713,8 +1718,13 @@ parser_construct_leaf_part (GMimeParser *parser, GMimeParserOptions *options, Co
header = priv->headers->pdata[i];
if (!toplevel || !g_ascii_strncasecmp (header->name, "Content-", 8)) {
- _g_mime_object_append_header (object, header->name, header->raw_name,
- header->raw_value, header->offset);
+ if (g_mime_object_get_header(object, header->name) != NULL) {
+ _g_mime_parser_options_notify (options, G_OBJECT(parser), header->offset,
+ GMIME_ERR_DUPLICATED_CONTENT_HDR, header->name);
+ } else {
+ _g_mime_object_append_header (object, header->name, header->raw_name,
+ header->raw_value, header->offset);
+ }
}
}
@@ -1856,8 +1866,13 @@ parser_construct_multipart (GMimeParser *parser, GMimeParserOptions *options, Co
header = priv->headers->pdata[i];
if (!toplevel || !g_ascii_strncasecmp (header->name, "Content-", 8)) {
- _g_mime_object_append_header (object, header->name, header->raw_name,
- header->raw_value, header->offset);
+ if (g_mime_object_get_header(object, header->name) != NULL) {
+ _g_mime_parser_options_notify (options, G_OBJECT(parser), header->offset,
+ GMIME_ERR_DUPLICATED_CONTENT_HDR, header->name);
+ } else {
+ _g_mime_object_append_header (object, header->name, header->raw_name,
+ header->raw_value, header->offset);
+ }
}
}
@@ -1981,6 +1996,7 @@ parser_construct_message (GMimeParser *parser, GMimeParserOptions *options)
message = g_mime_message_new (FALSE);
((GMimeObject *) message)->ensure_newline = FALSE;
+ _g_mime_header_list_set_options(g_mime_object_get_header_list(GMIME_OBJECT(message)), options);
for (i = 0; i < priv->headers->len; i++) {
header = priv->headers->pdata[i];
@@ -1996,8 +2012,13 @@ parser_construct_message (GMimeParser *parser, GMimeParserOptions *options)
}
if (g_ascii_strncasecmp (header->name, "Content-", 8) != 0) {
- _g_mime_object_append_header ((GMimeObject *) message, header->name, header->raw_name,
- header->raw_value, header->offset);
+ if (g_mime_object_get_header((GMimeObject *) message, header->name) != NULL) {
+ _g_mime_parser_options_notify (options, G_OBJECT(parser), header->offset,
+ GMIME_ERR_DUPLICATED_CONTENT_HDR, header->name);
+ } else {
+ _g_mime_object_append_header ((GMimeObject *) message, header->name, header->raw_name,
+ header->raw_value, header->offset);
+ }
}
}
diff --git a/tests/test-parser.c b/tests/test-parser.c
index 94f7161..a0a25f1 100644
--- a/tests/test-parser.c
+++ b/tests/test-parser.c
@@ -154,9 +154,16 @@ print_mime_struct_iter (GMimeMessage *message)
#endif /* PRINT_MIME_STRUCT */
static void
+report_issue(GObject *object, gint64 offset, GMimeError errcode, const gchar *item)
+{
+ g_warning("%p %" G_GINT64_FORMAT " %u %s", object, offset, (unsigned) errcode, item);
+}
+
+static void
test_parser (GMimeStream *stream)
{
GMimeFormatOptions *format = g_mime_format_options_get_default ();
+ GMimeParserOptions *options;
GMimeParser *parser;
GMimeMessage *message;
char *text;
@@ -167,7 +174,9 @@ test_parser (GMimeStream *stream)
g_mime_parser_init_with_stream (parser, stream);
ZenTimerStart (NULL);
- message = g_mime_parser_construct_message (parser, NULL);
+ options = g_mime_parser_options_new();
+ g_mime_parser_options_set_issue_cb(options, report_issue);
+ message = g_mime_parser_construct_message (parser, options);
ZenTimerStop (NULL);
ZenTimerReport (NULL, "gmime::parser_construct_message");
BADMAIL.gz
Description: application/gzip
pgph3cpDhj1LB.pgp
Description: PGP signature
_______________________________________________ balsa-list mailing list [email protected] https://mail.gnome.org/mailman/listinfo/balsa-list
