Dan Kogai <[EMAIL PROTECTED]> wrote:
> Perl Encode Hackers,
> Since Encode/JP/JIS.pm and Encode/JP/ISO_2022_JP is already coded to
> handle jisx0212 (if euc-jp supports that), it automagically adds
> jisx0212 support to other encodings as well
> I need to fix pod and t/JP.t so it tests 0212 part but I will upload
> new Encode package within 24 hours.
> Thank you Nick for making compile this smart!
>
> Dan the Man with a New Encoding
Excellent!
Here is a patch to remove Encode::Tcl::Extended.
(some corrections of Encode::Tcl:: are included)
and please remove the following files, too.
-Encode/lib/Encode/Tcl/Extended.pm
-Encode/Encode/euc-jp-0212.enc
IMO, Tcl's euc-jp.enc lacking JIS X 0212 deserves removing;
is it worth retaining if renamed?
(say, Java seems to have EUC_JP_LINUX for EUC-JP w/o JIS X 0212.)
http://java.sun.com/j2se/1.3/docs/guide/intl/encoding.doc.html
Nevertheless, it would be better for the euc-jp test
to be gone from Tcl.t.
### Patch starts from here
diff -urN Encode~/lib/Encode/Tcl/Escape.pm Encode/lib/Encode/Tcl/Escape.pm
--- Encode~/lib/Encode/Tcl/Escape.pm Tue Mar 12 04:56:38 2002
+++ Encode/lib/Encode/Tcl/Escape.pm Tue Mar 19 23:20:26 2002
@@ -31,7 +31,7 @@
$val =~ /[\x30-\x3F]$/ ? 2 : # (only 2 is supported)
$val =~ /[\x40-\x5F]$/ ? 2 : # double byte
$val =~ /[\x60-\x6F]$/ ? 3 : # triple byte
- $val =~ /[\x70-\x7F]$/ ? 4 :
+ $val =~ /[\x70-\x7E]$/ ? 4 :
# 4 or more (only 4 is supported)
croak("odd sequence is defined");
@@ -96,8 +96,8 @@
}
else
{
- # strictly, ([\x20-\x2F]*[\x30-\x7E]). '?' for chopped.
- $str =~ s/^([\x20-\x2F]*[\x30-\x7E]?)//;
+ # strictly, ([\x21-\x2F]*[\x30-\x7E]). '?' for chopped.
+ $str =~ s/^([\x21-\x2F]*[\x30-\x7E]?)//;
if ($chk && ! length $str)
{
$str = "\e$1"; # split sequence
@@ -215,7 +215,7 @@
SINGLE SHIFT TWO (SS2) ESC 04/14
SINGLE SHIFT THREE (SS3) ESC 04/15
-Designation of control character sets are not supported.
+Designation of control character sets is not supported.
=head1 SEE ALSO
diff -urN Encode~/lib/Encode/Tcl/Table.pm Encode/lib/Encode/Tcl/Table.pm
--- Encode~/lib/Encode/Tcl/Table.pm Tue Mar 12 04:56:38 2002
+++ Encode/lib/Encode/Tcl/Table.pm Tue Mar 19 23:10:16 2002
@@ -152,7 +152,7 @@
and handles types S, D, and M of Tcl encodings.
Implementation for type M is restricted to encodings
-in which bytes per a character is up to 2.
+in which the number of bytes per a character is up to 2.
=head1 SEE ALSO
diff -urN Encode~/t/Tcl.t Encode/t/Tcl.t
--- Encode~/t/Tcl.t Tue Mar 12 13:57:34 2002
+++ Encode/t/Tcl.t Tue Mar 19 23:59:48 2002
@@ -15,12 +15,11 @@
use Encode qw(encode decode);
use Encode::Tcl;
-my @encodings = qw(euc-cn euc-jp euc-kr big5 shiftjis); # CJK
+my @encodings = qw(euc-cn euc-kr big5 shiftjis); # CJK
my $n = 2;
my %greek = (
'euc-cn' => [0xA6A1..0xA6B8,0xA6C1..0xA6D8],
- 'euc-jp' => [0xA6A1..0xA6B8,0xA6C1..0xA6D8],
'euc-kr' => [0xA5C1..0xA5D8,0xA5E1..0xA5F8],
'big5' => [0xA344..0xA35B,0xA35C..0xA373],
'shiftjis' => [0x839F..0x83B6,0x83BF..0x83D6],
@@ -37,7 +36,6 @@
my %ideodigit = ( # cjk ideograph 'one' to 'ten'
'euc-cn' => [qw(d2bb b6fe c8fd cbc4 cee5 c1f9 c6df b0cb bec5 caae)],
- 'euc-jp' => [qw(b0ec c6f3 bbb0 bbcd b8de cfbb bcb7 c8ac b6e5 bdbd)],
'euc-kr' => [qw(ece9 eca3 dfb2 decc e7e9 d7bf f6d2 f8a2 cefa e4a8)],
'big5' => [qw(a440 a447 a454 a57c a4ad a4bb a443 a44b a445 a451)],
'shiftjis' => [qw(88ea 93f1 8e4f 8e6c 8cdc 985a 8eb5 94aa 8be3 8f5c)],
@@ -45,23 +43,9 @@
);
my @ideodigit = qw(one two three four five six seven eight nine ten);
-my $jis = '7bit-jis';
my $kr = '2022-kr';
my %esc_str;
-$esc_str{$jis} = {qw(
- 1b24422422242424262428242a1b2842
- 3042304430463048304a
- 1b284931323334355d1b2842
- ff71ff72ff73ff74ff75ff9d
- 1b2442467c4b5c1b2842
- 65e5672c
- 3132331b244234413b7a1b28425065726c
- 0031003200336f225b57005000650072006c
- 546573740a1b24422546253925481b28420a
- 0054006500730074000a30c630b930c8000a
-)};
-
$esc_str{$kr} = {qw(
1b2429430e2a22213e0f410d0a
304200b10041000d000a
@@ -84,24 +68,15 @@
my @ary_buff = ( # [ encoding, decoded, encoded ]
# type-M
["euc-cn", hiragana, "\xA4\xA2\xA4\xA4\xA4\xA6\xA4\xA8\xA4\xAA" ],
- ["euc-jp", hiragana, "\xA4\xA2\xA4\xA4\xA4\xA6\xA4\xA8\xA4\xAA" ],
- ["euc-jp", han_kana, "\x8E\xB1\x8E\xB2\x8E\xB3\x8E\xB4\x8E\xB5" ],
["euc-kr", hiragana, "\xAA\xA2\xAA\xA4\xAA\xA6\xAA\xA8\xAA\xAA" ],
["shiftjis", hiragana, "\x82\xA0\x82\xA2\x82\xA4\x82\xA6\x82\xA8" ],
["shiftjis", han_kana, "\xB1\xB2\xB3\xB4\xB5" ],
# type-E
["2022-cn", hiragana, "\e\$)A\cN". '$"$$$&$($*' . "\cO" ],
- ["2022-jp", hiragana, "\e\$B".'$"$$$&$($*'."\e(B" ],
["2022-kr", hiragana, "\e\$)C\cN". '*"*$*&*(**' . "\cO" ],
-# [ $jis, han_kana, "\e\(I".'12345'."\e(B" ],
["2022-jp1", macron, "\e\$(D\x2A\x27\x2A\x37\x2A\x45\x2A\x57\x2A\x69\e(B"],
["2022-jp2", "\x{C0}" . macron . "\x{C1}",
"\e\$(D\e.A\eN\x40\x2A\x27\x2A\x37\x2A\x45\x2A\x57\x2A\x69\e(B\eN\x41"],
-# type-X
- ["euc-jp-0212", hiragana, "\xA4\xA2\xA4\xA4\xA4\xA6\xA4\xA8\xA4\xAA" ],
- ["euc-jp-0212", han_kana, "\x8E\xB1\x8E\xB2\x8E\xB3\x8E\xB4\x8E\xB5" ],
- ["euc-jp-0212", macron,
- "\x8F\xAA\xA7\x8F\xAA\xB7\x8F\xAA\xC5\x8F\xAA\xD7\x8F\xAA\xE9" ],
);
plan test => $n*@encodings + $n*@encodings*@greek
#End of Patch
sincerely,
SADAHIRO Tomoyuki