Update of /cvsroot/mhonarc/mhonarc/MHonArc/lib
In directory subversions:/tmp/cvs-serv26846

Modified Files:
        mhtxtenrich.pl mhtxthtml.pl mhtxtplain.pl 
Log Message:
* text filters changed to use x-mha-charset field to get charset of
  body.
* mhtxtenrich.pl and mhtxthtml.pl now use CHARSETCONVERTERS to
  process character data in the same manner as mhtxtplain.pl.  This
  helps character data to be normalized to ascii + entity references.


Index: mhtxtenrich.pl
===================================================================
RCS file: /cvsroot/mhonarc/mhonarc/MHonArc/lib/mhtxtenrich.pl,v
retrieving revision 2.6
retrieving revision 2.7
diff -C2 -r2.6 -r2.7
*** mhtxtenrich.pl      27 Oct 2002 03:08:17 -0000      2.6
--- mhtxtenrich.pl      18 Dec 2002 05:47:46 -0000      2.7
***************
*** 37,40 ****
--- 37,45 ----
  package m2h_text_enriched;
  
+ my %special_to_char = (
+     'lt'  => '<',
+     'gt'  => '>',
+ );
+ 
  ##---------------------------------------------------------------------------
  ##    Filter routine.
***************
*** 44,62 ****
  sub filter {
      my($fields, $data, $isdecode, $args) = @_;
!     my($innofill, $chunk, $ret, $charset);
!     $ret  = "";
!     $args = ""  unless defined($args);
!     $charset = "";
! 
!     ## Grab charset parameter (if defined)
!     if ((defined($fields->{'content-type'}[0])) &&
!       ($fields->{'content-type'}[0] =~ /\bcharset\s*=\s*([^\s;]+)/i) ) {
!       $charset = lc $1;
!       $charset =~ s/['";\s]//g;
      }
  
      ## Convert specials
!     $$data =~ s|&|\&amp;|gi;
!     $$data =~ s|<<|\&lt;|gi;
  
      ## Translate text/enriched commands
--- 49,76 ----
  sub filter {
      my($fields, $data, $isdecode, $args) = @_;
!     my($innofill, $chunk);
!     my $charset = $fields->{'x-mha-charset'};
!     my($charcnv, $real_charset_name) =
!           readmail::MAILload_charset_converter($charset);
!     my $ret = "";
!     $args   = ""  unless defined($args);
! 
!     if (defined($charcnv) && defined(&$charcnv)) {
!       $$data = &$charcnv($$data, $real_charset_name);
!     } else {
!       warn qq/\n/,
!            qq/Warning: Unrecognized character set: $charset\n/,
!            qq/         Message-Id: <$mhonarc::MHAmsgid>\n/,
!            qq/         Message Number: $mhonarc::MHAmsgnum\n/;
      }
+     ## Fixup any EOL mess
+     $$data =~ s/\r?\n/\n/g;
+     $$data =~ s/\r/\n/g;
+ 
+     # translate back <>'s for tag processing
+     $$data =~ s/&([lg]t);/$special_to_char{$1}/g;
  
      ## Convert specials
!     $$data =~ s/<</\&lt;/g;
  
      ## Translate text/enriched commands
***************
*** 75,90 ****
        convert_tags(\$chunk);
        if (!$innofill) {
!           $chunk =~ s|(\r?\n\s*)|&nl_seq_to_brs($1)|gie;
        }
        $ret .= $chunk;
      }
- 
-     ## Translate 8-bit characters to entity refs based on charset
-     ##                (we already did '<' and '&' characters)
-     if ($charset =~ /iso-8859-([2-9]|10)/i) {
-       require 'iso8859.pl';
-       $ret = iso_8859::str2sgml($ret, $charset, 1);
-     }
- 
      $ret;
  }
--- 89,96 ----
        convert_tags(\$chunk);
        if (!$innofill) {
!           $chunk =~ s/(\n\s*)/&nl_seq_to_brs($1)/ge;
        }
        $ret .= $chunk;
      }
      $ret;
  }
***************
*** 122,127 ****
  
      # Not supported commands
!     $$str =~ s|<lang>\s*<param>([^<]*)</param>||gi;
!     $$str =~ s|</lang>||gi;
  }
  
--- 128,133 ----
  
      # Not supported commands
!     $$str =~ s|<lang>\s*<param>([^<]*)</param>|<div lang="$1">|gi;
!     $$str =~ s|</lang>|</div>|gi;
  }
  

Index: mhtxthtml.pl
===================================================================
RCS file: /cvsroot/mhonarc/mhonarc/MHonArc/lib/mhtxthtml.pl,v
retrieving revision 2.27
retrieving revision 2.28
diff -C2 -r2.27 -r2.28
*** mhtxthtml.pl        13 Dec 2002 07:19:01 -0000      2.27
--- mhtxthtml.pl        18 Dec 2002 05:47:46 -0000      2.28
***************
*** 50,53 ****
--- 50,60 ----
                 q/dynsrc|for|href|longdesc|profile|src|url|usemap)\b/;
  
+ my %special_to_char = (
+     'lt'    => '<',
+     'gt'    => '>',
+     'amp'   => '&',
+     'quot'  => '"',
+ );
+ 
  ##---------------------------------------------------------------------------
  ##    The filter must modify HTML content parts for merging into the
***************
*** 122,125 ****
--- 129,146 ----
      my $atdir    = $subdir ? $mhonarc::MsgPrefix.$mhonarc::MHAmsgnum : "";
      my $tmp;
+ 
+     my $charset = $fields->{'x-mha-charset'};
+     my($charcnv, $real_charset_name) =
+           readmail::MAILload_charset_converter($charset);
+     if (defined($charcnv) && defined(&$charcnv)) {
+       $$data = &$charcnv($$data, $real_charset_name);
+     } else {
+       warn qq/\n/,
+            qq/Warning: Unrecognized character set: $charset\n/,
+            qq/         Message-Id: <$mhonarc::MHAmsgid>\n/,
+            qq/         Message Number: $mhonarc::MHAmsgnum\n/;
+     }
+     # translate back HTML specials back
+     $$data =~ s/&([lg]t|amp|quot);/$special_to_char{$1}/g;
  
      ## Check comment declarations: may screw-up mhonarc processing

Index: mhtxtplain.pl
===================================================================
RCS file: /cvsroot/mhonarc/mhonarc/MHonArc/lib/mhtxtplain.pl,v
retrieving revision 2.33
retrieving revision 2.34
diff -C2 -r2.33 -r2.34
*** mhtxtplain.pl       9 Dec 2002 23:21:52 -0000       2.33
--- mhtxtplain.pl       18 Dec 2002 05:47:46 -0000      2.34
***************
*** 72,77 ****
  ##                    with a link to it from the message page.
  ##
- ##    default=set     Default charset to use if not set.
- ##
  ##    disableflowed
  ##                    Ignore flowed formatting for message text
--- 72,75 ----
***************
*** 243,247 ****
  
      my($charset, $nourl, $igncharset, $nonfixed,
!        $keepspace, $maxwidth, $target, $defset, $xhtml);
      my(%asis) = ( );
  
--- 241,245 ----
  
      my($charset, $nourl, $igncharset, $nonfixed,
!        $keepspace, $maxwidth, $target, $xhtml);
      my(%asis) = ( );
  
***************
*** 251,256 ****
      if ($args =~ /\bmaxwidth=(\d+)/i) { $maxwidth = $1; }
        else { $maxwidth = 0; }
-     if ($args =~ /\bdefault=(\S+)/i) { $defset = lc $1; }
-       else { $defset = 'us-ascii'; }
      $target = "";
      if ($args =~ /\btarget="([^"]+)"/i) { $target = $1; }
--- 249,252 ----
***************
*** 260,273 ****
        $target = qq/target="$target"/;
      }
-     $defset =~ s/['"\s]//g;
  
!     ## Grab charset parameter (if defined)
!     if ( (defined($fields->{'content-type'}[0])) &&
!        ($fields->{'content-type'}[0] =~ /\bcharset\s*=\s*([^\s;]+)/i) ) {
!       $charset = lc $1;
!       $charset =~ s/['";\s]//g;
!     } else {
!       $charset = $defset;
!     }
      ## Grab format parameter (if defined)
      my $textformat = 'fixed';
--- 256,263 ----
        $target = qq/target="$target"/;
      }
  
!     ## Grab charset parameter
!     $charset = $fields->{'x-mha-charset'};
! 
      ## Grab format parameter (if defined)
      my $textformat = 'fixed';

---------------------------------------------------------------------
To sign-off this list, send email to [EMAIL PROTECTED] with the
message text UNSUBSCRIBE MHONARC-DEV

Reply via email to