Seems to be getting worse. I used the 5.6.0 stable.tar.gz to build
and applied the patch. The perl test cases still run, my DBD::Oracle
test are failier.
Since I have some spare time at the moment, I'd like to track
those things down. (I love Perl and I love having unicode
in there.)
Browsing through sv.c, pp.c and friends, what is the design
philosophy for using SVf_UTF8? join upgrades to utf8, but should eq
do the same?
As far as I can see, Perl <5.6 could handle character strings and
byte arrays with the same constructs internally. But with utf8 and
especially with sv_utf8_upgrade() this is no longer true.
So, coming back to my original question: How am I to pass a BLOB
byte array into perl and do a _binary_ compare with a scalar? Is
there need for an additional operator?
//Stefan
> -----Original Message-----
> From: Gurusamy Sarathy [mailto:[EMAIL PROTECTED]]
> Sent: Wednesday, May 03, 2000 5:48 PM
> To: [EMAIL PROTECTED]
> Cc: [EMAIL PROTECTED]
> Subject: Re: binary compare of scalars
>
>
> On Wed, 03 May 2000 10:19:47 +0200, "Stefan Eissing" wrote:
> >Background: I have patched DBD::Oracle to recognize utf8 locale
> >and return utf8 scalars for Perl 5.6.0. It works. In one of the
> >standard tests however, a string with utf8 chars is inserted
> >into a BLOB, correctly read back again, but not eq to the original
> >string.
>
> Perl 5.6.0 has known bugs in Unicode support (which is why it is
> marked "experimental"). eq not knowing about SvUTF8 is one of them.
>
> The attached patch should help.
>
>
> Sarathy
> [EMAIL PROTECTED]
> -----------------------------------8<-----------------------------------
> Change 5921 by gsar@auger on 2000/04/24 06:58:26
>
> make eq unicode-aware (from Gisle Aas); fix bogus tests revealed
> by fix
>
> Affected files ...
>
> ... //depot/perl/sv.c#226 edit
> ... //depot/perl/t/lib/charnames.t#9 edit
> ... //depot/perl/t/pragma/utf8.t#6 edit
>
> Differences ...
>
> ==== //depot/perl/sv.c#226 (text) ====
> Index: perl/sv.c
> --- perl/sv.c.~1~ Wed May 3 08:41:47 2000
> +++ perl/sv.c Wed May 3 08:41:47 2000
> @@ -3920,10 +3920,19 @@
> else
> pv1 = SvPV(str1, cur1);
>
> - if (!str2)
> - return !cur1;
> - else
> - pv2 = SvPV(str2, cur2);
> + if (cur1) {
> + if (!str2)
> + return 0;
> + if (SvUTF8(str1) != SvUTF8(str2)) {
> + if (SvUTF8(str1)) {
> + sv_utf8_upgrade(str2);
> + }
> + else {
> + sv_utf8_upgrade(str1);
> + }
> + }
> + }
> + pv2 = SvPV(str2, cur2);
>
> if (cur1 != cur2)
> return 0;
>
> ==== //depot/perl/t/lib/charnames.t#9 (text) ====
> Index: perl/t/lib/charnames.t
> --- perl/t/lib/charnames.t.~1~ Wed May 3 08:41:47 2000
> +++ perl/t/lib/charnames.t Wed May 3 08:41:47 2000
> @@ -42,15 +42,21 @@
> $encoded_be = "\320\261";
> $encoded_alpha = "\316\261";
> $encoded_bet = "\327\221";
> +
> +sub to_bytes {
> + use bytes;
> + my $bytes = shift;
> +}
> +
> {
> use charnames ':full';
>
> - print "not " unless "\N{CYRILLIC SMALL LETTER BE}" eq $encoded_be;
> + print "not " unless to_bytes("\N{CYRILLIC SMALL LETTER BE}")
> eq $encoded_be;
> print "ok 4\n";
>
> use charnames qw(cyrillic greek :short);
>
> - print "not " unless "\N{be},\N{alpha},\N{hebrew:bet}"
> + print "not " unless to_bytes("\N{be},\N{alpha},\N{hebrew:bet}")
> eq "$encoded_be,$encoded_alpha,$encoded_bet";
> print "ok 5\n";
> }
>
> ==== //depot/perl/t/pragma/utf8.t#6 (xtext) ====
> Index: perl/t/pragma/utf8.t
> --- perl/t/pragma/utf8.t.~1~ Wed May 3 08:41:47 2000
> +++ perl/t/pragma/utf8.t Wed May 3 08:41:47 2000
> @@ -25,64 +25,64 @@
> $_ = ">\x{263A}<";
> s/([\x{80}-\x{10ffff}])/"&#".ord($1).";"/eg;
> ok $_, '>☺<';
> - $test++;
> + $test++; # 1
>
> $_ = ">\x{263A}<";
> my $rx = "\x{80}-\x{10ffff}";
> s/([$rx])/"&#".ord($1).";"/eg;
> ok $_, '>☺<';
> - $test++;
> + $test++; # 2
>
> $_ = ">\x{263A}<";
> my $rx = "\\x{80}-\\x{10ffff}";
> s/([$rx])/"&#".ord($1).";"/eg;
> ok $_, '>☺<';
> - $test++;
> + $test++; # 3
>
> $_ = "alpha,numeric";
> m/([[:alpha:]]+)/;
> ok $1, 'alpha';
> - $test++;
> + $test++; # 4
>
> $_ = "alphaNUMERICstring";
> m/([[:^lower:]]+)/;
> ok $1, 'NUMERIC';
> - $test++;
> + $test++; # 5
>
> $_ = "alphaNUMERICstring";
> m/(\p{Ll}+)/;
> ok $1, 'alpha';
> - $test++;
> + $test++; # 6
>
> $_ = "alphaNUMERICstring";
> m/(\p{Lu}+)/;
> ok $1, 'NUMERIC';
> - $test++;
> + $test++; # 7
>
> $_ = "alpha,numeric";
> m/([\p{IsAlpha}]+)/;
> ok $1, 'alpha';
> - $test++;
> + $test++; # 8
>
> $_ = "alphaNUMERICstring";
> m/([^\p{IsLower}]+)/;
> ok $1, 'NUMERIC';
> - $test++;
> + $test++; # 9
>
> $_ = "alpha123numeric456";
> m/([\p{IsDigit}]+)/;
> ok $1, '123';
> - $test++;
> + $test++; # 10
>
> $_ = "alpha123numeric456";
> m/([^\p{IsDigit}]+)/;
> ok $1, 'alpha';
> - $test++;
> + $test++; # 11
>
> $_ = ",123alpha,456numeric";
> m/([\p{IsAlnum}]+)/;
> ok $1, '123alpha';
> - $test++;
> + $test++; # 12
> }
> {
> use utf8;
> @@ -90,80 +90,88 @@
> $_ = "\x{263A}>\x{263A}\x{263A}";
>
> ok length, 4;
> - $test++;
> + $test++; # 13
>
> ok length((m/>(.)/)[0]), 1;
> - $test++;
> + $test++; # 14
>
> ok length($&), 2;
> - $test++;
> + $test++; # 15
>
> ok length($'), 1;
> - $test++;
> + $test++; # 16
>
> ok length($`), 1;
> - $test++;
> + $test++; # 17
>
> ok length($1), 1;
> - $test++;
> + $test++; # 18
>
> ok length($tmp=$&), 2;
> - $test++;
> + $test++; # 19
>
> ok length($tmp=$'), 1;
> - $test++;
> + $test++; # 20
>
> ok length($tmp=$`), 1;
> - $test++;
> + $test++; # 21
>
> ok length($tmp=$1), 1;
> - $test++;
> + $test++; # 22
> +
> + {
> + use bytes;
>
> - ok $&, pack("C*", ord(">"), 0342, 0230, 0272);
> - $test++;
> + my $tmp = $&;
> + ok $tmp, pack("C*", ord(">"), 0342, 0230, 0272);
> + $test++; # 23
>
> - ok $', pack("C*", 0342, 0230, 0272);
> - $test++;
> + $tmp = $';
> + ok $tmp, pack("C*", 0342, 0230, 0272);
> + $test++; # 24
>
> - ok $`, pack("C*", 0342, 0230, 0272);
> - $test++;
> + $tmp = $`;
> + ok $tmp, pack("C*", 0342, 0230, 0272);
> + $test++; # 25
>
> - ok $1, pack("C*", 0342, 0230, 0272);
> - $test++;
> + $tmp = $1;
> + ok $tmp, pack("C*", 0342, 0230, 0272);
> + $test++; # 26
> + }
>
> {
> use bytes;
> no utf8;
>
> ok length, 10;
> - $test++;
> + $test++; # 27
>
> ok length((m/>(.)/)[0]), 1;
> - $test++;
> + $test++; # 28
>
> ok length($&), 2;
> - $test++;
> + $test++; # 29
>
> ok length($'), 5;
> - $test++;
> + $test++; # 30
>
> ok length($`), 3;
> - $test++;
> + $test++; # 31
>
> ok length($1), 1;
> - $test++;
> + $test++; # 32
>
> ok $&, pack("C*", ord(">"), 0342);
> - $test++;
> + $test++; # 33
>
> ok $', pack("C*", 0230, 0272, 0342, 0230, 0272);
> - $test++;
> + $test++; # 34
>
> ok $`, pack("C*", 0342, 0230, 0272);
> - $test++;
> + $test++; # 35
>
> ok $1, pack("C*", 0342);
> - $test++;
> + $test++; # 36
>
> }
>
> @@ -174,80 +182,87 @@
> }
>
> ok length, 10;
> - $test++;
> + $test++; # 37
>
> ok length((m/>(.)/)[0]), 1;
> - $test++;
> + $test++; # 38
>
> ok length($&), 2;
> - $test++;
> + $test++; # 39
>
> ok length($'), 1;
> - $test++;
> + $test++; # 40
>
> ok length($`), 1;
> - $test++;
> + $test++; # 41
>
> ok length($1), 1;
> - $test++;
> + $test++; # 42
>
> ok length($tmp=$&), 2;
> - $test++;
> + $test++; # 43
>
> ok length($tmp=$'), 1;
> - $test++;
> + $test++; # 44
>
> ok length($tmp=$`), 1;
> - $test++;
> + $test++; # 45
>
> ok length($tmp=$1), 1;
> - $test++;
> + $test++; # 46
>
> - ok $&, pack("C*", ord(">"), 0342, 0230, 0272);
> - $test++;
> + {
> + use bytes;
>
> - ok $', pack("C*", 0342, 0230, 0272);
> - $test++;
> + my $tmp = $&;
> + ok $tmp, pack("C*", ord(">"), 0342, 0230, 0272);
> + $test++; # 47
>
> - ok $`, pack("C*", 0342, 0230, 0272);
> - $test++;
> + $tmp = $';
> + ok $tmp, pack("C*", 0342, 0230, 0272);
> + $test++; # 48
>
> - ok $1, pack("C*", 0342, 0230, 0272);
> - $test++;
> + $tmp = $`;
> + ok $tmp, pack("C*", 0342, 0230, 0272);
> + $test++; # 49
>
> + $tmp = $1;
> + ok $tmp, pack("C*", 0342, 0230, 0272);
> + $test++; # 50
> + }
> {
> use bytes;
> no utf8;
>
> ok length, 10;
> - $test++;
> + $test++; # 51
>
> ok length((m/>(.)/)[0]), 1;
> - $test++;
> + $test++; # 52
>
> ok length($&), 2;
> - $test++;
> + $test++; # 53
>
> ok length($'), 5;
> - $test++;
> + $test++; # 54
>
> ok length($`), 3;
> - $test++;
> + $test++; # 55
>
> ok length($1), 1;
> - $test++;
> + $test++; # 56
>
> ok $&, pack("C*", ord(">"), 0342);
> - $test++;
> + $test++; # 57
>
> ok $', pack("C*", 0230, 0272, 0342, 0230, 0272);
> - $test++;
> + $test++; # 58
>
> ok $`, pack("C*", 0342, 0230, 0272);
> - $test++;
> + $test++; # 59
>
> ok $1, pack("C*", 0342);
> - $test++;
> + $test++; # 60
>
> }
> }
> End of Patch.