On Tue, Mar 26, 2002 at 09:07:25AM +0900, Dan Kogai wrote:
> Encode hackers (Especially Autrijius)

Autrijus.

> * rename gb2312 to gb2312-raw, ksc5601 to ksc5601-raw
> * and alias gb2312 and ksc5601 to euc-(cn|kr)

I agree. :)

>   I know it's technically wrong but perl opts more for practical than 
> technical....

Well, at least almost every other program (hc, iconv, mozilla...) does
that anyway.

Also, please don't forget to apply the following patch to HZ.pm, which
does s/gb2312/gb2312-raw/, as well as cleaned up the code a little.

/Autrijus/

--- HZ.pm.old   Tue Mar 26 11:43:54 2002
+++ HZ.pm       Tue Mar 26 11:50:52 2002
@@ -1,13 +1,12 @@
 package Encode::CN::HZ;
 
 use strict;
-no warnings 'redefine'; # to quell the "use Encode" below
 
 use vars qw($VERSION);
 $VERSION = do { my @r = (q$Revision: 0.92 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, 
@r };
 
+use Encode ();
 use Encode::CN;
-use Encode qw|encode decode|;
 use base 'Encode::Encoding';
 
 # HZ is but escaped GB, so we implement it with the
@@ -20,7 +19,7 @@
 sub decode
 {
     my ($obj,$str,$chk) = @_;
-    my $gb = Encode::find_encoding('gb2312');
+    my $gb = Encode::find_encoding('gb2312-raw');
 
     $str =~ s{~                        # starting tilde
        (?:
@@ -44,7 +43,7 @@
            :
        (defined $2)    ? $gb->decode($2, $chk) # decode the characters
            :
-       ''                                      # '' on ~\n and invalid escape
+       ''                                      # ~\n and invalid escape = ''
     }egx;
 
     return $str;
@@ -54,38 +53,38 @@
 {
     my ($obj,$str,$chk) = @_;
     my ($out, $in_gb);
-    my $gb = Encode::find_encoding('gb2312');
+    my $gb = Encode::find_encoding('gb2312-raw');
 
     $str =~ s/~/~~/g;
 
-    # XXX: Since CHECK and partial decoding  has not been implemented yet,
+    # XXX: Since CHECK and partial decoding has not been implemented yet,
     #      we'll use a very crude way to test for GB2312ness.
 
     for my $index (0 .. length($str) - 1) {
        no warnings 'utf8';
 
        my $char = substr($str, $index, 1);
-       my $try  = $gb->encode($char);  # try encode this char
+       my $try  = $gb->encode($char);  # try to encode this character
 
-       if (defined($try)) {            # is a GB character
+       if (defined($try)) {            # is a GB character:
            if ($in_gb) {
-               $out .= $try;           # in GB mode - just append it
+               $out .= $try;           #  in GB mode - just append it
            }
            else {
-               $out .= "~{$try";       # enter GB mode, then append it
-               $in_gb = 1;
+               $in_gb = 1;             #  enter GB mode, then append it
+               $out .= "~{$try";
            }
-       }
+       }                               # not a GB character:
        elsif ($in_gb) {
-           $out .= "~}$char";          # leave GB mode, then append it
-           $in_gb = 0;
+           $in_gb = 0;                 #  leave GB mode, then append it
+           $out .= "~}$char";
        }
        else {
-           $out .= $char;              # not in GB mode - just append it
+           $out .= $char;              #  not in GB mode - just append it
        }
     }
 
-    $out .= '~}' if $in_gb;            # add closing brace as needed
+    $out .= '~}' if $in_gb;            # add closing brace if needed
 
     return $out;
 }

Attachment: msg00909/pgp00000.pgp
Description: PGP signature

Reply via email to