I have reduced this to the attatched testcase
sorry, screwed up trying to attatch the file in reportbug, here's the
testcase.
/* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
/* GMime
* Copyright (C) 2000-2010 Jeffrey Stedfast
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public License
* as published by the Free Software Foundation; either version 2.1
* of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free
* Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
* 02110-1301, USA.
*/
#include <string.h>
#include <limits.h>
#include <ctype.h>
#include <errno.h>
typedef struct _GMimeParam GMimeParam;
/**
* GMimeParam:
* @next: Pointer to the next param.
* @name: Parameter name.
* @value: Parameter value.
*
* A parameter name/value pair as used for some Content header fields.
**/
struct _GMimeParam {
GMimeParam *next;
char *name;
char *value;
};
#include <glib.h>
static unsigned short gmime_special_table[256] = {1029};
enum {
IS_CTRL = (1 << 0),
IS_LWSP = (1 << 1),
IS_TSPECIAL = (1 << 2),
IS_SPECIAL = (1 << 3),
IS_SPACE = (1 << 4),
IS_DSPECIAL = (1 << 5),
IS_QPSAFE = (1 << 6),
IS_ESAFE = (1 << 7), /* encoded word safe */
IS_PSAFE = (1 << 8), /* encode word in phrase safe */
IS_ATTRCHAR = (1 << 9), /* attribute-char from rfc2184 */
/* ctype replacements */
IS_ASCII = (1 << 10), /* ascii */
IS_BLANK = (1 << 11), /* space or tab */
};
#define is_ctrl(x) ((gmime_special_table[(unsigned char)(x)] & IS_CTRL) != 0)
#define is_lwsp(x) ((gmime_special_table[(unsigned char)(x)] & IS_LWSP) != 0)
#define is_tspecial(x) ((gmime_special_table[(unsigned char)(x)] & IS_TSPECIAL) != 0)
#define is_type(x, t) ((gmime_special_table[(unsigned char)(x)] & (t)) != 0)
#define is_ttoken(x) ((gmime_special_table[(unsigned char)(x)] & (IS_TSPECIAL|IS_LWSP|IS_CTRL)) == 0)
#define is_atom(x) ((gmime_special_table[(unsigned char)(x)] & (IS_SPECIAL|IS_SPACE|IS_CTRL)) == 0)
#define is_dtext(x) ((gmime_special_table[(unsigned char)(x)] & IS_DSPECIAL) == 0)
#define is_fieldname(x) ((gmime_special_table[(unsigned char)(x)] & (IS_CTRL|IS_SPACE)) == 0)
#define is_qpsafe(x) ((gmime_special_table[(unsigned char)(x)] & IS_QPSAFE) != 0)
#define is_especial(x) ((gmime_special_table[(unsigned char)(x)] & IS_ESAFE) != 0)
#define is_psafe(x) ((gmime_special_table[(unsigned char)(x)] & IS_PSAFE) != 0)
#define is_attrchar(x) ((gmime_special_table[(unsigned char)(x)] & IS_ATTRCHAR) != 0)
/* ctype replacements */
#define is_ascii(x) ((gmime_special_table[(unsigned char)(x)] & IS_ASCII) != 0)
#define is_blank(x) ((gmime_special_table[(unsigned char)(x)] & IS_BLANK) != 0)
#define CHARS_LWSP " \t\n\r" /* linear whitespace chars */
#define CHARS_TSPECIAL "()<>@,;:\\\"/[]?="
#define CHARS_SPECIAL "()<>@,;:\\\".[]"
#define CHARS_CSPECIAL "()\\\r" /* not in comments */
#define CHARS_DSPECIAL "[]\\\r \t" /* not in domains */
#define CHARS_ESPECIAL "()<>@,;:\"/[]?.=_" /* encoded word specials (rfc2047 5.1) */
#define CHARS_PSPECIAL "!*+-/=_" /* encoded phrase specials (rfc2047 5.3) */
#define CHARS_ATTRCHAR "*'% " /* attribute-char from rfc2184 */
#define GMIME_FOLD_LEN 76
#define w(x)
#define d(x)
static char *
decode_value (const char **in)
{
const char *inptr = *in;
if (*inptr == '"') {
return decode_quoted_string (in);
} else if (is_ttoken (*inptr)) {
return decode_token (in);
}
return decode_token (in);
}
static char *
decode_param_token (const char **in)
{
const char *inptr = *in;
while (is_ttoken (*inptr) && *inptr != '*')
inptr++;
}
static gboolean
decode_rfc2184_param (const char **in, char **paramp, int *part, gboolean *encoded)
{
const char *inptr = *in;
decode_param_token (&inptr);
}
static gboolean
decode_param (const char **in, char **paramp, char **valuep, int *id, gboolean *encoded)
{
gboolean is_rfc2184 = FALSE;
const char *inptr = *in;
char *param, *value = NULL;
char *val;
is_rfc2184 = decode_rfc2184_param (&inptr, ¶m, id, encoded);
if (*inptr == '=') {
inptr++;
value = decode_value (&inptr);
if (!is_rfc2184 && value) {
if (strstr (value, "=?") != NULL) {
/* We (may) have a broken param value that is rfc2047
* encoded. Since both Outlook and Netscape/Mozilla do
* this, we should handle this case.
*/
if ((val = g_mime_utils_header_decode_text (value))) {
g_free (value);
value = val;
}
}
if (!g_utf8_validate (value, -1, NULL)) {
/* A (broken) mailer has sent us an unencoded 8bit value.
* Attempt to save it by assuming it's in the user's
* locale and converting to UTF-8 */
if ((val = g_mime_iconv_locale_to_utf8 (value))) {
g_free (value);
value = val;
} else {
d(g_warning ("Failed to convert %s param value (\"%s\") to UTF-8: %s",
param, value, g_strerror (errno)));
}
}
}
}
if (param && value) {
*paramp = param;
*valuep = value;
*in = inptr;
return TRUE;
} else {
g_free (param);
g_free (value);
return FALSE;
}
}
struct _rfc2184_part {
char *value;
int id;
};
struct _rfc2184_param {
struct _rfc2184_param *next;
const char *charset;
// GMimeParam *param;
GPtrArray *parts;
char *lang;
};
static int
rfc2184_sort_cb (const void *v0, const void *v1)
{
const struct _rfc2184_part *p0 = *((struct _rfc2184_part **) v0);
const struct _rfc2184_part *p1 = *((struct _rfc2184_part **) v1);
return p0->id - p1->id;
}
#define HEXVAL(c) (isdigit (c) ? (c) - '0' : tolower (c) - 'a' + 10)
static GMimeParam *
decode_param_list (const char *in)
{
struct _rfc2184_param *rfc2184, *list, *t;
GMimeParam *param, *params, *tail;
struct _rfc2184_part *part;
GHashTable *rfc2184_hash;
const char *inptr = in;
char *name, *value;
gboolean encoded;
GString *gvalue;
guint i;
int id;
params = NULL;
tail = (GMimeParam *) ¶ms;
list = NULL;
t = (struct _rfc2184_param *) &list;
//rfc2184_hash = g_hash_table_new (g_mime_strcase_hash, g_mime_strcase_equal);
decode_lwsp (&inptr);
do {
/* invalid format? */
if (!decode_param (&inptr, &name, &value, &id, &encoded)) {
decode_lwsp (&inptr);
if (*inptr == ';')
continue;
break;
}
if (id != -1) {
/* we have a multipart rfc2184 param */
if (!(rfc2184 = g_hash_table_lookup (rfc2184_hash, name))) {
rfc2184 = rfc2184_param_new (name, value, id, encoded);
//param = rfc2184->param;
t->next = rfc2184;
t = rfc2184;
g_hash_table_insert (rfc2184_hash, param->name, rfc2184);
tail->next = param;
tail = param;
} else {
rfc2184_param_add_part (rfc2184, value, id, encoded);
g_free (name);
}
} else {
param = g_new (GMimeParam, 1);
param->next = NULL;
param->name = name;
if (encoded) {
/* singleton encoded rfc2184 param value */
param->value = rfc2184_decode (value);
g_free (value);
} else {
/* normal parameter value */
param->value = value;
}
tail->next = param;
tail = param;
}
decode_lwsp (&inptr);
} while (*inptr++ == ';');
}
/**
* g_mime_param_new_from_string:
* @str: input string
*
* Creates a parameter list based on the input string.
*
* Returns: a #GMimeParam structure based on @string.
**/
GMimeParam *
g_mime_param_new_from_string (const char *str)
{
g_return_val_if_fail (str != NULL, NULL);
return decode_param_list (str);
}