Hello, here is a patch for Encode.pm to use escape-sequence encoding. Known problems: (1) For present, any compiled encodings (ASCII, ISO-8859-*, etc.) are not available for the code extension of escape-sequence encoding. (2) encodings with SINGLE SHIFTs (SS2, SS3) are not avaliable. Modification: (1) iso2022-jp.enc and iso2022-kr.enc may contain the GR characters ("\xA0" .. "\xFF"). According to RFC1554 (ISO-2022-JP-2) and RFC1557 (Korean Character Encoding for Internet Messages), they must be in 7 bit format. So, the following files are added. 7bit.enc (ASCII, not including ESC, SI, SO) 7bit-jis.enc 7bit-kana.enc 7bit-kr.enc (these names might be not so good... please comment and/or tell better names) (2) A new parameter, 'standard'. It means the escape sequence omitted at the beginning of the string and added at the end of the string if neccessary (but not always. if the last character is an ASCII, the final \x1b(B is not appended). (ex.) # Encoding file: 7bit-jis, escape-driven E name 7bit-jis init {} final {} standard \x1b(B 7bit \x1b(B 7bit \x1b(J (snip..) ============== diff -Pur Encode.orig/7bit-jis.enc Encode/7bit-jis.enc --- Encode.orig/7bit-jis.enc Thu Jan 01 09:00:00 1970 +++ Encode/7bit-jis.enc Sat Jun 30 05:55:08 2001 @@ -0,0 +1,13 @@ +# Encoding file: 7bit-jis, escape-driven +E +name 7bit-jis +init {} +final {} +standard \x1b(B +7bit \x1b(B +7bit \x1b(J +7bit-kana \x1b(I +jis0208 \x1b$B +jis0208 \x1b$@ +jis0208 \x1b&@\x1b$B +jis0212 \x1b$(D diff -Pur Encode.orig/7bit-kana.enc Encode/7bit-kana.enc --- Encode.orig/7bit-kana.enc Thu Jan 01 09:00:00 1970 +++ Encode/7bit-kana.enc Sat Jun 30 07:21:10 2001 @@ -0,0 +1,20 @@ +# Encoding file: 7bit-kana, single-byte +S +0025 0 1 +00 +0000000100020003000400050006000700080009000A000B000C000D00000000 +0010001100120013001400150016001700180019001A001B001C001D0000001F +0000FF61FF62FF63FF64FF65FF66FF67FF68FF69FF6AFF6BFF6CFF6DFF6EFF6F +FF70FF71FF72FF73FF74FF75FF76FF77FF78FF79FF7AFF7BFF7CFF7DFF7EFF7F +FF80FF81FF82FF83FF84FF85FF86FF87FF88FF89FF8AFF8BFF8CFF8DFF8EFF8F +FF90FF91FF92FF93FF94FF95FF96FF97FF98FF99FF9AFF9BFF9CFF9DFF9EFF9F +0000000000000000000000000000000000000000000000000000000000000000 +0000000000000000000000000000000000000000000000000000000000000000 +0000000000000000000000000000000000000000000000000000000000000000 +0000000000000000000000000000000000000000000000000000000000000000 +0000000000000000000000000000000000000000000000000000000000000000 +0000000000000000000000000000000000000000000000000000000000000000 +0000000000000000000000000000000000000000000000000000000000000000 +0000000000000000000000000000000000000000000000000000000000000000 +0000000000000000000000000000000000000000000000000000000000000000 +0000000000000000000000000000000000000000000000000000000000000000 diff -Pur Encode.orig/7bit-kr.enc Encode/7bit-kr.enc --- Encode.orig/7bit-kr.enc Thu Jan 01 09:00:00 1970 +++ Encode/7bit-kr.enc Sat Jun 30 05:54:52 2001 @@ -0,0 +1,7 @@ +# Encoding file: 7bit-kr, escape-driven +E +name 7bit-kr +init \x1b$)C +final {} +7bit \x0f +ksc5601 \x0e diff -Pur Encode.orig/7bit.enc Encode/7bit.enc --- Encode.orig/7bit.enc Thu Jan 01 09:00:00 1970 +++ Encode/7bit.enc Sat Jun 30 06:59:28 2001 @@ -0,0 +1,20 @@ +# Encoding file: 7bit (ASCII for E encodings), single-byte +S +003F 0 1 +00 +0000000100020003000400050006000700080009000A000B000C000D00000000 +0010001100120013001400150016001700180019001A001B001C001D0000001F +0020002100220023002400250026002700280029002A002B002C002D002E002F +0030003100320033003400350036003700380039003A003B003C003D003E003F +0040004100420043004400450046004700480049004A004B004C004D004E004F +0050005100520053005400550056005700580059005A005B005C005D005E005F +0060006100620063006400650066006700680069006A006B006C006D006E006F +0070007100720073007400750076007700780079007A007B007C007D007E007F +0000000000000000000000000000000000000000000000000000000000000000 +0000000000000000000000000000000000000000000000000000000000000000 +0000000000000000000000000000000000000000000000000000000000000000 +0000000000000000000000000000000000000000000000000000000000000000 +0000000000000000000000000000000000000000000000000000000000000000 +0000000000000000000000000000000000000000000000000000000000000000 +0000000000000000000000000000000000000000000000000000000000000000 +0000000000000000000000000000000000000000000000000000000000000000 diff -Pur Encode.orig/Tcl.pm Encode/Tcl.pm --- Encode.orig/Tcl.pm Tue Jun 26 22:26:56 2001 +++ Encode/Tcl.pm Sat Jun 30 07:27:46 2001 @@ -229,27 +229,115 @@ sub read { - my ($class,$fh,$name) = @_; - my %self = (Name => $name, Num => 0); + my ($obj,$fh,$name) = @_; + my(%tbl, @esc, $enc); while (<$fh>) { my ($key,$val) = /^(\S+)\s+(.*)$/; $val =~ s/^\{(.*?)\}/$1/g; $val =~ s/\\x([0-9a-f]{2})/chr(hex($1))/ge; - $self{$key} = $val; + if($enc = Encode->getEncoding($key)){ + $tbl{$val} = ref($enc) eq 'Encode::Tcl' ? $enc->loadEncoding : $enc; + push @esc, $val; + }else{ + $obj->{$key} = $val; + } } - return bless \%self,$class; + $obj->{'Ctl'} = \@esc; + $obj->{'Tbl'} = \%tbl; + return $obj; } sub decode { - croak("Not implemented yet"); + my ($obj,$str,$chk) = @_; + my $tbl = $obj->{'Tbl'}; + my $ctl = $obj->{'Ctl'}; + my $ini = $obj->{'init'}; + my $fin = $obj->{'final'}; + my $std = $ctl->[0]; + my $cur = $std; + my $uni = ''; + while (length($str)){ + my $uch = substr($str,0,1,''); + if($uch eq "\e"){ + $str =~ s/^([\x20-\x2F]*[\x30-\x7E](?:\x1b[\x20-\x2F]*[\x30-\x7E])*)//; + my $esc = "\e$1"; + if($tbl->{$esc}){ $cur = $esc } + elsif($esc eq $ini || $esc eq $fin){ $cur = $std } + else{carp "unknown escape sequence" } + next; + } + if($uch eq "\x0e" || $uch eq "\x0f"){ + $cur = $uch and next; + } + my $x; + my $ch = ord($uch); + my $rep = $tbl->{$cur}->{'Rep'}; + my $touni = $tbl->{$cur}->{'ToUni'}; + if (&$rep($ch) eq 'C') + { + $x = $touni->[0][$ch]; + } + else + { + $x = $touni->[$ch][ord(substr($str,0,1,''))]; + } + unless (defined $x) + { + last if $chk; + # What do we do here ? + $x = ''; + } + $uni .= $x; + } + $_[1] = $str if $chk; + return $uni; } sub encode { - croak("Not implemented yet"); -} + my ($obj,$uni,$chk) = @_; + my $tbl = $obj->{'Tbl'}; + my $ctl = $obj->{'Ctl'}; + my $ini = $obj->{'init'}; + my $fin = $obj->{'final'}; + my $std = $obj->{'standard'} || ''; + my $str = $ini; + my $pre = $std; + my $cur = $pre; + while (length($uni)){ + my $ch = chr(ord(substr($uni,0,1,''))); + my $x = $tbl->{$pre}->{FmUni}->{$ch}; + unless(defined $x){ + foreach my $esc (@$ctl){ + $x = $tbl->{$esc}->{FmUni}->{$ch}; + $cur = $esc and last if defined $x; + } + } + if($x == 0x0d && !($ini eq '' && $fin eq '') && substr($uni,0,1) eq "\x0a") + { + $str .= $cur unless $cur eq $pre; + $str .= $fin."\x0d\x0a".$ini; + substr($uni,0,1,''); + $pre = $std; + next; + } + my $def = $tbl->{$cur}->{'Def'}; + my $rep = $tbl->{$cur}->{'Rep'}; + unless (defined $x){ + last if ($chk); + $x = $def; + } + $str .= $cur unless $cur eq $pre; + $str .= pack(&$rep($x),$x); + $pre = $cur; + } + $str .= $std unless $cur eq $std; + $str .= $fin; + $_[1] = $uni if $chk; + return $str; +} 1; __END__ regards, SADAHIRO Tomoyuki E-mail: [EMAIL PROTECTED] URL: http://homepage1.nifty.com/nomenclator/perl/