Update of /cvsroot/mhonarc/mhonarc/MHonArc/lib
In directory subversions:/tmp/cvs-serv26472

Modified Files:
        readmail.pl 
Log Message:
* Added hooks for text entity character encoding support.
* Define special x-mha-charset entity header field for text entities.
  text-based MIMEFILTERS should now use it to know what the charset
  is of the entity body.


Index: readmail.pl
===================================================================
RCS file: /cvsroot/mhonarc/mhonarc/MHonArc/lib/readmail.pl,v
retrieving revision 2.24
retrieving revision 2.25
diff -C2 -r2.24 -r2.25
*** readmail.pl 17 Nov 2002 03:38:52 -0000      2.24
--- readmail.pl 18 Dec 2002 05:43:48 -0000      2.25
***************
*** 209,212 ****
--- 209,223 ----
  
  ##---------------------------------------------------------------------------
+ 
+ $TextEncode = undef
+     unless defined($TextEncode);
+ 
+ $TextEncoderFunc = undef
+     unless defined($TextEncodingFunc);
+ 
+ $TextDefCharset = 'us-ascii'
+     unless defined($TextDefCharset);
+ 
+ ##---------------------------------------------------------------------------
  ##    Variables holding functions for generating processed output
  ##    for MAILread_body().  The default functions generate HTML.
***************
*** 442,445 ****
--- 453,468 ----
      $uribase =~ s|(.*/).*|$1|  if $uribase;
  
+     ## Convert text encoding
+     if ($type eq 'text') {
+       my $charset = extract_charset($content, $subtype, $body);
+       $fields->{'x-mha-charset'} = $charset;
+       if ($TextEncode &&
+               defined($TextEncoderFunc) &&
+               defined(&$TextEncoderFunc) &&
+               &$TextEncoderFunc($body, $charset, $TextEncode)) {
+           $fields->{'x-mha-charset'} = $TextEncode;
+       }
+     }
+ 
      ## Load content-type filter
      if ( (!defined($filter = &load_filter($ctype)) || !defined(&$filter)) &&
***************
*** 1142,1145 ****
--- 1165,1193 ----
      }
      $ret;
+ }
+ ##---------------------------------------------------------------------------##
+ 
+ sub extract_charset {
+     my $content = shift;  # Content-type string of entity
+     my $subtype = shift;  # Text sub-type
+     my $body    = shift;  # Reference to entity text
+     my $charset = $TextDefCharset;
+ 
+     if ($content =~ /\bcharset\s*=\s*([^\s;]+)/i) {
+       $charset =  lc $1;
+       $charset =~ s/['";\s]//g;
+     }
+ 
+     # If HTML, check <meta http-equiv=content-type> tag since it
+     # can be different than what is specified in the entity header.
+     if (($subtype eq 'html' || $subtype eq 'x-html') &&
+       ($body =~ m/(<meta\s+http-equiv\s*=\s*['"]?
+                    content-type\b[^>]*>)/xi)) {
+       my $meta = $1;
+       if ($meta =~ m/\bcharset\s*=\s*['"]?([\w\.\-]+)/i) {
+           $charset = lc $1;
+       }
+     }
+     $charset;
  }
  

---------------------------------------------------------------------
To sign-off this list, send email to [EMAIL PROTECTED] with the
message text UNSUBSCRIBE MHONARC-DEV

Reply via email to