Re: [Evolution-hackers] [patch] fixed incorrect rfc2047 decode for CJK header

jacky Sun, 23 Dec 2007 20:52:50 -0800

--- Philip Van Hoof <[EMAIL PROTECTED]>wrote:

> Hey Jacky,
> 
> This is a port of your patch to Tinymail's
> camel-lite
>


Thank you.


> On Sun, 2007-12-23 at 23:09 +0800, jacky wrote:
> > Hi, all.
> > 
> > The rfc2047 decoder in libcamel can not decode
> some
> > CJK header correctly. Although some of them are
> not
> > correspond to RFC, but I need to decode it
> correctly
> > and I thought if evolution can display there email
> > correctly more people like it.
> > 
> > So I write a new rfc2047 decoder, and it's in the
> > patch. With the patch, libcamel can decode CJK
> header
> > correctly and evolution can display CJK header
> > correctly now. I had test it in my mailbox. My
> mailbox
> > has 2000 emails which were sent by evolution,
> > thunderbird, outlook, outlook express, foxmail,
> open
> > webmail, yahoo, gmail, lotus notes, etc. Without
> this
> > patch, almost 20% of there emails can't be decoded
> and
> > displayed correctly, with this patch, 99% of there
> > emails can be decoded and displayed correctly.
> > 
> > And I found that the attachment with CJK name
> can't be
> > recognised and displayed by outlook / outlook
> express
> > / foxmail. This is because there email clients do
> not
> > support RFC2184. Evolution always use RFC2184
> encode
> > mothod to encode attachment name, so the email
> with
> > CJK named attachment can't display in outlook /
> > outlook express / foxmail. In thunderbird, you can
> set
> > the option "mail.strictly_mime.parm_folding" to 0
> or 1
> > for using RFC2047 encode mothod to encode
> attachment
> > name. Can we add a similar option?
> > 
> > Best regards.
> > 
> > 
> >      
>
___________________________________________________________
> 
> > 雅虎邮箱传递新年祝福，个性贺卡送亲朋！ 
> >
>
http://cn.mail.yahoo.com/gc/index.html?entry=5&souce=mail_mailletter_tagline
> > _______________________________________________
> Evolution-hackers mailing list
> Evolution-hackers@gnome.org
>
http://mail.gnome.org/mailman/listinfo/evolution-hackers
> -- 
> Philip Van Hoof, freelance software developer
> home: me at pvanhoof dot be 
> gnome: pvanhoof at gnome dot org 
> http://pvanhoof.be/blog
> http://codeminded.be
> 
> 
> 
> > Index:
>
libtinymail-camel/camel-lite/camel/camel-mime-utils.c
>
===================================================================
> ---
>
libtinymail-camel/camel-lite/camel/camel-mime-utils.c
> (revision 3190)
> +++
>
libtinymail-camel/camel-lite/camel/camel-mime-utils.c
> (working copy)
> @@ -821,125 +821,207 @@
>       *in = inptr;
>  }
>  
> +static void
> +print_hex (unsigned char *data, size_t len)
> +{
> +     size_t i, x;
> +     unsigned char *p = data;
> +     char high, low;
> +
> +     x = 0;
> +     printf ("%04u    ", x);
> +     for (i = 0; i < len; i++) {
> +             high = *p >> 4;
> +             high = (high<10) ? high + '0' : high + 'a' - 10;
> +
> +             low = *p & 0x0f;
> +             low = (low<10) ? low + '0' : low + 'a' - 10;
> +
> +             printf ("0x%c%c  ", high, low);
> +
> +             p++;
> +             x++;
> +             if (i % 8 == 7) {
> +                     printf ("\n%04u    ", x);
> +             }
> +     }
> +     printf ("\n");
> +}
> +
> +static size_t
> +conv_to_utf8 (const char *encname, char *in, size_t
> inlen, char *out, size_t outlen)
> +{
> +     char *charset, *inbuf, *outbuf;
> +     iconv_t ic;
> +     size_t inbuf_len, outbuf_len, ret;
> +
> +     charset = (char *) e_iconv_charset_name (encname);
> +
> +     ic = e_iconv_open ("UTF-8", charset);
> +     if (ic == (iconv_t) -1) {
> +             printf ("e_iconv_open() error\n");
> +             return (size_t)-1;
> +     }
> +
> +     inbuf = in;
> +     inbuf_len = inlen;
> +
> +     outbuf = out;
> +     outbuf_len = outlen;
> +
> +     ret = e_iconv (ic, (const char **) &inbuf,
> &inbuf_len, &outbuf, &outbuf_len);
> +     if (ret == (size_t)-1) {
> +             printf ("e_iconv() error! source charset is %s,
> target charset is %s\n", charset, "UTF-8");
> +             printf ("converted %u bytes, but last %u bytes
> can't convert!!\n", inlen - inbuf_len, inbuf_len);
> +             printf ("source data:\n");
> +             print_hex (in, inlen);
> +
> +             *outbuf = '\0';
> +             printf ("target string is \"%s\"\n", out);
> +
> +             return (size_t)-1;
> +     }
> +
> +     ret = outlen - outbuf_len;
> +     out[ret] = '\0';
> +
> +     e_iconv_close (ic);
> +
> +     return ret;
> +}
> +
>  /* decode rfc 2047 encoded string segment */
> +#define DECWORD_LEN 1024
> +#define UTF8_DECWORD_LEN 2048
> +
>  static char *
>  rfc2047_decode_word(const char *in, size_t len)
>  {
> -     const char *inptr = in+2;
> -     const char *inend = in+len-2;
> -     const char *inbuf;
> -     const char *charset;
> -     char *encname, *p;
> -     int tmplen;
> -     size_t ret;
> -     char *decword = NULL;
> -     char *decoded = NULL;
> -     char *outbase = NULL;
> -     char *outbuf;
> -     size_t inlen, outlen;
> -     gboolean retried = FALSE;
> -     iconv_t ic;
> -     int idx = 0;
> +     char prev_charset[32], curr_charset[32];
> +     char encode;
> +     char *start, *inptr, *inend;
> +     char decword[DECWORD_LEN],
> utf8_decword[UTF8_DECWORD_LEN];
> +     char *decword_ptr, *utf8_decword_ptr;
> +     size_t inlen, outlen, ret;
>  
>       d(printf("rfc2047: decoding '%.*s'\n", len, in));
>  
> +     prev_charset[0] = curr_charset[0] = '\0';
> +
> +     decword_ptr = decword;
> +     utf8_decword_ptr = utf8_decword;
> +
>       /* quick check to see if this could possibly be a
> real encoded word */
> -
> -     if (len < 8 || !(in[0] == '=' && in[1] == '?')) {
> +     if (len < 8
> +         || !(in[0] == '=' && in[1] == '?'
> +              && in[len-1] == '=' && in[len-2] == '?')) {
>               d(printf("invalid\n"));
>               return NULL;
>       }
>  
> -     /* skip past the charset to the encoding type */
> -     inptr = memchr (inptr, '?', inend-inptr);
> -     if (inptr != NULL && inptr < inend + 2 && inptr[2]
> == '?') {
> -             d(printf("found ?, encoding is '%c'\n",
> inptr[0]));
> -             inptr++;
> -             tmplen = inend-inptr-2;
> -             decword = g_alloca (tmplen); /* this will always
> be more-than-enough room */
> -             switch(toupper(inptr[0])) {
> -             case 'Q':
> -                     inlen = quoted_decode((const unsigned char *)
> inptr+2, tmplen, (unsigned char *) decword);
> -                     break;
> -             case 'B': {
> -                     int state = 0;
> -                     unsigned int save = 0;
> +     inptr = (char *) in;
> +     inend = (char *) (in + len);
> +     outlen = sizeof(utf8_decword);
>  
> -                     inlen = camel_base64_decode_step((unsigned char
> *) inptr+2, tmplen, (unsigned char *) decword,
> &state, &save);
> -                     /* if state != 0 then error? */
> -                     break;
> -             }
> -             default:
> -                     /* uhhh, unknown encoding type - probably an
> invalid encoded word string */
> +     while (inptr < inend) {
> +             /* begin */
> +             inptr = memchr (inptr, '?', inend-inptr);
> +             if (!inptr || *(inptr-1) != '=') {
>                       return NULL;
>               }
> -             d(printf("The encoded length = %d\n", inlen));
> -             if (inlen > 0) {
> -                     /* yuck, all this snot is to setup iconv! */
> -                     tmplen = inptr - in - 3;
> -                     encname = g_alloca (tmplen + 1);
> -                     memcpy (encname, in + 2, tmplen);
> -                     encname[tmplen] = '\0';
>  
> -                     /* rfc2231 updates rfc2047 encoded words...
> -                      * The ABNF given in RFC 2047 for encoded-words
> is:
> -                      *   encoded-word := "=?" charset "?" encoding
> "?" encoded-text "?="
> -                      * This specification changes this ABNF to:
> -                      *   encoded-word := "=?" charset ["*" language]
> "?" encoding "?" encoded-text "?="
> -                      */
> +             inptr++;
>  
> -                     /* trim off the 'language' part if it's there...
> */
> -                     p = strchr (encname, '*');
> -                     if (p)
> -                             *p = '\0';
> +             /* charset */
> +             start = inptr;
> +             inptr = memchr (inptr, '?', inend-inptr);
> +             if (!inptr) {
> +                     return NULL;
> +             }
> 
=== message truncated ===



      ___________________________________________________________ 
雅虎邮箱传递新年祝福，个性贺卡送亲朋！ 
http://cn.mail.yahoo.com/gc/index.html?entry=5&souce=mail_mailletter_tagline
_______________________________________________
Evolution-hackers mailing list
Evolution-hackers@gnome.org
http://mail.gnome.org/mailman/listinfo/evolution-hackers

Re: [Evolution-hackers] [patch] fixed incorrect rfc2047 decode for CJK header

Reply via email to