Change 23624 by [EMAIL PROTECTED] on 2004/12/07 18:26:48
Upgrade to Encode 2.09
Affected files ...
... //depot/perl/ext/Encode/Changes#71 edit
... //depot/perl/ext/Encode/Encode.pm#152 edit
... //depot/perl/ext/Encode/Encode.xs#108 edit
... //depot/perl/ext/Encode/META.yml#15 edit
... //depot/perl/ext/Encode/Unicode/Unicode.xs#21 edit
... //depot/perl/ext/Encode/t/Encode.t#10 edit
... //depot/perl/ext/Encode/t/fallback.t#6 edit
Differences ...
==== //depot/perl/ext/Encode/Changes#71 (text) ====
Index: perl/ext/Encode/Changes
--- perl/ext/Encode/Changes#70~23421~ Mon Oct 25 00:29:50 2004
+++ perl/ext/Encode/Changes Tue Dec 7 10:26:48 2004
@@ -1,14 +1,38 @@
# Revision history for Perl extension Encode.
#
-# $Id: Changes,v 2.8 2004/10/24 13:00:29 dankogai Exp dankogai $
+# $Id: Changes,v 2.9 2004/12/03 19:16:53 dankogai Exp dankogai $
#
-$Revision: 2.8 $ $Date: 2004/10/24 13:00:29 $
+$Revision: 2.9 $ $Date: 2004/12/03 19:16:53 $
+! Encode.pm Encode.xs
+ Addressed " :encoding(utf8) broken in perl-5.8.6".
+ Message-Id: <[EMAIL PROTECTED]>
+! Encode.pm
+ Addressed "(de|en)code($valid_encoding, undef) does not warn".
+ http://rt.cpan.org/NoAuth/Bug.html?id=8723
+! Encode.pm t/Encode.t
+ Addressed "Can't encode URI". When a reference is fed to (en|de)code,
+ Encode now stringifies instead of returning undef.
+ http://rt.cpan.org/NoAuth/Bug.html?id=8725
+! Encode.xs t/fallback.t
+ Addressed "FB_HTMLCREF and FB_XMLCREF for the UTF-8 decoder".
+ http://rt.cpan.org/NoAuth/Bug.html?id=8694
+! Encode.pm
+ Addressed "s/digit/number/".
+ http://rt.cpan.org/NoAuth/Bug.html?id=8695
+! Encode.pm
+ Addressed "while (defined(read )) { ... } is an infinite loop".
+ http://rt.cpan.org/NoAuth/Bug.html?id=8696
+! Encode.pm
+ Addressed "What the heck is UCM?".
+ Document fixed so that it no longer contains "UCM-Based Encodings".
+ http://rt.cpan.org/NoAuth/Bug.html?id=8697
+
+2.08 2004/10/24 13:00:29
! Encode.xs lib/Encode/Encoding.pm Unicode/Unicode.{pm,xs}
- Resolved the issue that was raised by the Encode::utf8 fallbacks vs.
- PerlIO::encoding issue that was introduced in 2.07. This is done by
- making use of ->renew() method that used to be used only by
- Encode::Unicode. ->renewed() method was also introduced to fetch
- the value thereof.
+ Resolved the issue that was raised by 2.07 -- Encode::utf8 fallbacks
+ that was introduce messed up PerlIO::encoding.
+ * To do so, ->renew() is renewed and ->renewed() was introduced to
+ tell whether the caller is PerlIO or not.
Message-Id: <[EMAIL PROTECTED]>
2.07 2004/10/22 19:35:52
==== //depot/perl/ext/Encode/Encode.pm#152 (text) ====
Index: perl/ext/Encode/Encode.pm
--- perl/ext/Encode/Encode.pm#151~23421~ Mon Oct 25 00:29:50 2004
+++ perl/ext/Encode/Encode.pm Tue Dec 7 10:26:48 2004
@@ -1,9 +1,9 @@
#
-# $Id: Encode.pm,v 2.8 2004/10/24 12:32:06 dankogai Exp $
+# $Id: Encode.pm,v 2.9 2004/12/03 19:16:40 dankogai Exp $
#
package Encode;
use strict;
-our $VERSION = do { my @r = (q$Revision: 2.8 $ =~ /\d+/g); sprintf
"%d."."%02d" x $#r, @r };
+our $VERSION = do { my @r = (q$Revision: 2.9 $ =~ /\d+/g); sprintf
"%d."."%02d" x $#r, @r };
sub DEBUG () { 0 }
use XSLoader ();
XSLoader::load(__PACKAGE__, $VERSION);
@@ -140,7 +140,7 @@
{
my ($name, $string, $check) = @_;
return undef unless defined $string;
- return undef if ref $string;
+ $string .= '' if ref $string; # stringify;
$check ||=0;
my $enc = find_encoding($name);
unless(defined $enc){
@@ -156,7 +156,7 @@
{
my ($name,$octets,$check) = @_;
return undef unless defined $octets;
- return undef if ref $octets;
+ $octets .= '' if ref $octets;
$check ||=0;
my $enc = find_encoding($name);
unless(defined $enc){
@@ -401,9 +401,7 @@
the result is always off, even when it contains completely valid utf8
string. See L</"The UTF-8 flag"> below.
-encode($valid_encoding, undef) is harmless but warns you for
-C<Use of uninitialized value in subroutine entry>.
-encode($valid_encoding, '') is harmless and warnless.
+If the $string is C<undef> or a reference then C<undef> is returned.
=item $string = decode(ENCODING, $octets [, CHECK])
@@ -423,9 +421,7 @@
ASCII data (or EBCDIC on EBCDIC machines). See L</"The UTF-8 flag">
below.
-decode($valid_encoding, undef) is harmless but warns you for
-C<Use of uninitialized value in subroutine entry>.
-decode($valid_encoding, '') is harmless and warnless.
+If the $string is C<undef> or a reference then C<undef> is returned.
=item [$length =] from_to($octets, FROM_ENC, TO_ENC [, CHECK])
@@ -578,10 +574,10 @@
=item I<CHECK> = Encode::FB_DEFAULT ( == 0)
If I<CHECK> is 0, (en|de)code will put a I<substitution character> in
-place of a malformed character. When you encode to UCM-based encodings,
-E<lt>subcharE<gt> will be used. When you decode from UCM-based
-encodings, the code point C<0xFFFD> is used. If the data is supposed
-to be UTF-8, an optional lexical warning (category utf8) is given.
+place of a malformed character. When you encode, E<lt>subcharE<gt>
+will be used. When you decode the code point C<0xFFFD> is used. If
+the data is supposed to be UTF-8, an optional lexical warning
+(category utf8) is given.
=item I<CHECK> = Encode::FB_CROAK ( == 1)
@@ -600,12 +596,10 @@
(i.e. you are reading with a fixed-width buffer). Here is a sample
code that does exactly this:
- my $data = ''; my $utf8 = '';
- while(defined(read $fh, $buffer, 256)){
- # buffer may end in a partial character so we append
- $data .= $buffer;
- $utf8 .= decode($encoding, $data, Encode::FB_QUIET);
- # $data now contains the unprocessed partial character
+ my $buffer = ''; my $string = '';
+ while(read $fh, $buffer, 256, length($buffer)){
+ $string .= decode($encoding, $buffer, Encode::FB_QUIET);
+ # $buffer now contains the unprocessed partial character
}
=item I<CHECK> = Encode::FB_WARN
@@ -629,8 +623,8 @@
in the character repertoire of the encoding.
HTML/XML character reference modes are about the same, in place of
-C<\x{I<HHHH>}>, HTML uses C<&#I<NNNN>;> where I<NNNN> is a decimal digit and
-XML uses C<&#xI<HHHH>;> where I<HHHH> is the hexadecimal digit.
+C<\x{I<HHHH>}>, HTML uses C<&#I<NNN>;> where I<NNN> is a decimal number and
+XML uses C<&#xI<HHHH>;> where I<HHHH> is the hexadecimal number.
=item The bitmask
==== //depot/perl/ext/Encode/Encode.xs#108 (text) ====
Index: perl/ext/Encode/Encode.xs
--- perl/ext/Encode/Encode.xs#107~23421~ Mon Oct 25 00:29:50 2004
+++ perl/ext/Encode/Encode.xs Tue Dec 7 10:26:48 2004
@@ -1,5 +1,5 @@
/*
- $Id: Encode.xs,v 2.2 2004/10/24 13:00:29 dankogai Exp dankogai $
+ $Id: Encode.xs,v 2.3 2004/12/03 19:16:53 dankogai Exp dankogai $
*/
#define PERL_NO_GET_CONTEXT
@@ -279,7 +279,6 @@
#if 0
fprintf(stderr, "renewed == %d\n", renewed);
#endif
- if (renewed){ check |= ENCODE_RETURN_ON_ERR; }
}
FREETMPS; LEAVE;
/* end PerlIO check */
@@ -302,6 +301,8 @@
U8 skip = UTF8SKIP(s);
if ((s + skip) > e) {
/* Partial character - done */
+ if (renewed)
+ break;
goto decode_utf8_fallback;
}
else if (is_utf8_char(s)) {
@@ -331,7 +332,9 @@
break;
}
if (check & (ENCODE_PERLQQ|ENCODE_HTMLCREF|ENCODE_XMLCREF)){
- SV* subchar = newSVpvf("\\x%02" UVXf, (UV)*s);
+ SV* subchar = newSVpvf(check & ENCODE_PERLQQ ? "\\x%02" UVXf :
+ check & ENCODE_HTMLCREF ? "&#%" UVuf ";" :
+ "&#x%" UVxf ";", (UV)*s);
sv_catsv(dst, subchar);
SvREFCNT_dec(subchar);
} else {
==== //depot/perl/ext/Encode/META.yml#15 (text) ====
Index: perl/ext/Encode/META.yml
--- perl/ext/Encode/META.yml#14~23421~ Mon Oct 25 00:29:50 2004
+++ perl/ext/Encode/META.yml Tue Dec 7 10:26:48 2004
@@ -1,7 +1,7 @@
# http://module-build.sourceforge.net/META-spec.html
#XXXXXXX This is a prototype!!! It will change in the future!!! XXXXX#
name: Encode
-version: 2.08
+version: 2.09
version_from: Encode.pm
installdirs: perl
requires:
==== //depot/perl/ext/Encode/Unicode/Unicode.xs#21 (text) ====
Index: perl/ext/Encode/Unicode/Unicode.xs
--- perl/ext/Encode/Unicode/Unicode.xs#20~23421~ Mon Oct 25 00:29:50 2004
+++ perl/ext/Encode/Unicode/Unicode.xs Tue Dec 7 10:26:48 2004
@@ -1,5 +1,5 @@
/*
- $Id: Unicode.xs,v 2.1 2004/10/24 13:00:29 dankogai Exp dankogai $
+ $Id: Unicode.xs,v 2.1 2004/10/24 13:00:29 dankogai Exp $
*/
#define PERL_NO_GET_CONTEXT
==== //depot/perl/ext/Encode/t/Encode.t#10 (text) ====
Index: perl/ext/Encode/t/Encode.t
--- perl/ext/Encode/t/Encode.t#9~23417~ Sat Oct 23 12:58:07 2004
+++ perl/ext/Encode/t/Encode.t Tue Dec 7 10:26:48 2004
@@ -143,6 +143,9 @@
ok( is_utf8($a)); # weird but true: an empty UTF-8 string
# non-string arguments
-ok(decode(latin1 => bless {}, "x"), undef);
-ok(encode(utf8 => bless {}, "x"), undef);
-
+package Encode::Dummy;
+use overload q("") => sub { $_[0]->[0] };
+sub new { my $class = shift; bless [ @_ ] => $class }
+package main;
+ok(decode(latin1 => Encode::Dummy->new("foobar")), "foobar");
+ok(encode(utf8 => Encode::Dummy->new("foobar")), "foobar");
==== //depot/perl/ext/Encode/t/fallback.t#6 (text) ====
Index: perl/ext/Encode/t/fallback.t
--- perl/ext/Encode/t/fallback.t#5~23417~ Sat Oct 23 12:58:07 2004
+++ perl/ext/Encode/t/fallback.t Tue Dec 7 10:26:48 2004
@@ -17,7 +17,7 @@
use strict;
#use Test::More qw(no_plan);
-use Test::More tests => 36;
+use Test::More tests => 40;
use Encode q(:all);
my $uo = '';
@@ -137,17 +137,17 @@
is($dst, $ah, "FB_HTMLCREF ascii");
is($src, '', "FB_HTMLCREF residue ascii");
-#$src = $ao;
-#$dst = $utf8->decode($src, FB_HTMLCREF);
-#is($dst, $uh, "FB_HTMLCREF utf8");
-#is($src, '', "FB_HTMLCREF residue utf8");
+$src = $ao;
+$dst = $utf8->decode($src, FB_HTMLCREF);
+is($dst, $uh, "FB_HTMLCREF utf8");
+is($src, '', "FB_HTMLCREF residue utf8");
$src = $uo;
$dst = $ascii->encode($src, FB_XMLCREF);
is($dst, $ax, "FB_XMLCREF ascii");
is($src, '', "FB_XMLCREF residue ascii");
-#$src = $ao;
-#$dst = $utf8->decode($src, FB_XMLCREF);
-#is($dst, $ax, "FB_XMLCREF utf8");
-#is($src, '', "FB_XMLCREF residue utf8");
+$src = $ao;
+$dst = $utf8->decode($src, FB_XMLCREF);
+is($dst, $ax, "FB_XMLCREF utf8");
+is($src, '', "FB_XMLCREF residue utf8");
End of Patch.