Seems to be getting worse. I used the 5.6.0 stable.tar.gz to build
and applied the patch. The perl test cases still run, my DBD::Oracle
test are failier. 

Since I have some spare time at the moment, I'd like to track 
those things down. (I love Perl and I love having unicode
in there.) 

Browsing through sv.c, pp.c and friends, what is the design
philosophy for using SVf_UTF8? join upgrades to utf8, but should eq
do the same?

As far as I can see, Perl <5.6 could handle character strings and
byte arrays with the same constructs internally. But with utf8 and
especially with sv_utf8_upgrade() this is no longer true. 

So, coming back to my original question: How am I to pass a BLOB 
byte array into perl and do a _binary_ compare with a scalar? Is
there need for an additional operator?

//Stefan


> -----Original Message-----
> From: Gurusamy Sarathy [mailto:[EMAIL PROTECTED]]
> Sent: Wednesday, May 03, 2000 5:48 PM
> To: [EMAIL PROTECTED]
> Cc: [EMAIL PROTECTED]
> Subject: Re: binary compare of scalars 
> 
> 
> On Wed, 03 May 2000 10:19:47 +0200, "Stefan Eissing" wrote:
> >Background: I have patched DBD::Oracle to recognize utf8 locale
> >and return utf8 scalars for Perl 5.6.0. It works. In one of the
> >standard tests however, a string with utf8 chars is inserted
> >into a BLOB, correctly read back again, but not eq to the original
> >string. 
> 
> Perl 5.6.0 has known bugs in Unicode support (which is why it is
> marked "experimental").  eq not knowing about SvUTF8 is one of them.
> 
> The attached patch should help.
> 
> 
> Sarathy
> [EMAIL PROTECTED]
> -----------------------------------8<-----------------------------------
> Change 5921 by gsar@auger on 2000/04/24 06:58:26
> 
>       make eq unicode-aware (from Gisle Aas); fix bogus tests revealed
>       by fix
> 
> Affected files ...
> 
> ... //depot/perl/sv.c#226 edit
> ... //depot/perl/t/lib/charnames.t#9 edit
> ... //depot/perl/t/pragma/utf8.t#6 edit
> 
> Differences ...
> 
> ==== //depot/perl/sv.c#226 (text) ====
> Index: perl/sv.c
> --- perl/sv.c.~1~     Wed May  3 08:41:47 2000
> +++ perl/sv.c Wed May  3 08:41:47 2000
> @@ -3920,10 +3920,19 @@
>      else
>       pv1 = SvPV(str1, cur1);
>  
> -    if (!str2)
> -     return !cur1;
> -    else
> -     pv2 = SvPV(str2, cur2);
> +    if (cur1) {
> +     if (!str2)
> +         return 0;
> +     if (SvUTF8(str1) != SvUTF8(str2)) {
> +         if (SvUTF8(str1)) {
> +             sv_utf8_upgrade(str2);
> +         }
> +         else {
> +             sv_utf8_upgrade(str1);
> +         }
> +     }
> +    }
> +    pv2 = SvPV(str2, cur2);
>  
>      if (cur1 != cur2)
>       return 0;
> 
> ==== //depot/perl/t/lib/charnames.t#9 (text) ====
> Index: perl/t/lib/charnames.t
> --- perl/t/lib/charnames.t.~1~        Wed May  3 08:41:47 2000
> +++ perl/t/lib/charnames.t    Wed May  3 08:41:47 2000
> @@ -42,15 +42,21 @@
>  $encoded_be = "\320\261";
>  $encoded_alpha = "\316\261";
>  $encoded_bet = "\327\221";
> +
> +sub to_bytes {
> +    use bytes;
> +    my $bytes = shift;
> +}
> +
>  {
>    use charnames ':full';
>  
> -  print "not " unless "\N{CYRILLIC SMALL LETTER BE}" eq $encoded_be;
> +  print "not " unless to_bytes("\N{CYRILLIC SMALL LETTER BE}") 
> eq $encoded_be;
>    print "ok 4\n";
>  
>    use charnames qw(cyrillic greek :short);
>  
> -  print "not " unless "\N{be},\N{alpha},\N{hebrew:bet}" 
> +  print "not " unless to_bytes("\N{be},\N{alpha},\N{hebrew:bet}")
>      eq "$encoded_be,$encoded_alpha,$encoded_bet";
>    print "ok 5\n";
>  }
> 
> ==== //depot/perl/t/pragma/utf8.t#6 (xtext) ====
> Index: perl/t/pragma/utf8.t
> --- perl/t/pragma/utf8.t.~1~  Wed May  3 08:41:47 2000
> +++ perl/t/pragma/utf8.t      Wed May  3 08:41:47 2000
> @@ -25,64 +25,64 @@
>      $_ = ">\x{263A}<"; 
>      s/([\x{80}-\x{10ffff}])/"&#".ord($1).";"/eg; 
>      ok $_, '>&#9786;<';
> -    $test++;
> +    $test++;                         # 1
>  
>      $_ = ">\x{263A}<"; 
>      my $rx = "\x{80}-\x{10ffff}";
>      s/([$rx])/"&#".ord($1).";"/eg; 
>      ok $_, '>&#9786;<';
> -    $test++;
> +    $test++;                         # 2
>  
>      $_ = ">\x{263A}<"; 
>      my $rx = "\\x{80}-\\x{10ffff}";
>      s/([$rx])/"&#".ord($1).";"/eg; 
>      ok $_, '>&#9786;<';
> -    $test++;
> +    $test++;                         # 3
>  
>      $_ = "alpha,numeric"; 
>      m/([[:alpha:]]+)/; 
>      ok $1, 'alpha';
> -    $test++;
> +    $test++;                         # 4
>  
>      $_ = "alphaNUMERICstring";
>      m/([[:^lower:]]+)/; 
>      ok $1, 'NUMERIC';
> -    $test++;
> +    $test++;                         # 5
>  
>      $_ = "alphaNUMERICstring";
>      m/(\p{Ll}+)/; 
>      ok $1, 'alpha';
> -    $test++;
> +    $test++;                         # 6
>  
>      $_ = "alphaNUMERICstring"; 
>      m/(\p{Lu}+)/; 
>      ok $1, 'NUMERIC';
> -    $test++;
> +    $test++;                         # 7
>  
>      $_ = "alpha,numeric"; 
>      m/([\p{IsAlpha}]+)/; 
>      ok $1, 'alpha';
> -    $test++;
> +    $test++;                         # 8
>  
>      $_ = "alphaNUMERICstring";
>      m/([^\p{IsLower}]+)/; 
>      ok $1, 'NUMERIC';
> -    $test++;
> +    $test++;                         # 9
>  
>      $_ = "alpha123numeric456"; 
>      m/([\p{IsDigit}]+)/; 
>      ok $1, '123';
> -    $test++;
> +    $test++;                         # 10
>  
>      $_ = "alpha123numeric456"; 
>      m/([^\p{IsDigit}]+)/; 
>      ok $1, 'alpha';
> -    $test++;
> +    $test++;                         # 11
>  
>      $_ = ",123alpha,456numeric"; 
>      m/([\p{IsAlnum}]+)/; 
>      ok $1, '123alpha';
> -    $test++;
> +    $test++;                         # 12
>  }
>  {
>      use utf8;
> @@ -90,80 +90,88 @@
>      $_ = "\x{263A}>\x{263A}\x{263A}"; 
>  
>      ok length, 4;
> -    $test++;
> +    $test++;                         # 13
>  
>      ok length((m/>(.)/)[0]), 1;
> -    $test++;
> +    $test++;                         # 14
>  
>      ok length($&), 2;
> -    $test++;
> +    $test++;                         # 15
>  
>      ok length($'), 1;
> -    $test++;
> +    $test++;                         # 16
>  
>      ok length($`), 1;
> -    $test++;
> +    $test++;                         # 17
>  
>      ok length($1), 1;
> -    $test++;
> +    $test++;                         # 18
>  
>      ok length($tmp=$&), 2;
> -    $test++;
> +    $test++;                         # 19
>  
>      ok length($tmp=$'), 1;
> -    $test++;
> +    $test++;                         # 20
>  
>      ok length($tmp=$`), 1;
> -    $test++;
> +    $test++;                         # 21
>  
>      ok length($tmp=$1), 1;
> -    $test++;
> +    $test++;                         # 22
> +
> +    {
> +        use bytes;
>  
> -    ok $&, pack("C*", ord(">"), 0342, 0230, 0272);
> -    $test++;
> +        my $tmp = $&;
> +     ok $tmp, pack("C*", ord(">"), 0342, 0230, 0272);
> +     $test++;                                # 23
>  
> -    ok $', pack("C*", 0342, 0230, 0272);
> -    $test++;
> +     $tmp = $';
> +     ok $tmp, pack("C*", 0342, 0230, 0272);
> +     $test++;                                # 24
>  
> -    ok $`, pack("C*", 0342, 0230, 0272);
> -    $test++;
> +     $tmp = $`;
> +     ok $tmp, pack("C*", 0342, 0230, 0272);
> +     $test++;                                # 25
>  
> -    ok $1, pack("C*", 0342, 0230, 0272);
> -    $test++;
> +     $tmp = $1;
> +     ok $tmp, pack("C*", 0342, 0230, 0272);
> +     $test++;                                # 26
> +    }
>  
>      {
>       use bytes;
>       no utf8;
>  
>       ok length, 10;
> -     $test++;
> +     $test++;                                # 27
>  
>       ok length((m/>(.)/)[0]), 1;
> -     $test++;
> +     $test++;                                # 28
>  
>       ok length($&), 2;
> -     $test++;
> +     $test++;                                # 29
>  
>       ok length($'), 5;
> -     $test++;
> +     $test++;                                # 30
>  
>       ok length($`), 3;
> -     $test++;
> +     $test++;                                # 31
>  
>       ok length($1), 1;
> -     $test++;
> +     $test++;                                # 32
>  
>       ok $&, pack("C*", ord(">"), 0342);
> -     $test++;
> +     $test++;                                # 33
>  
>       ok $', pack("C*", 0230, 0272, 0342, 0230, 0272);
> -     $test++;
> +     $test++;                                # 34
>  
>       ok $`, pack("C*", 0342, 0230, 0272);
> -     $test++;
> +     $test++;                                # 35
>  
>       ok $1, pack("C*", 0342);
> -     $test++;
> +     $test++;                                # 36
>  
>      }
>  
> @@ -174,80 +182,87 @@
>      }
>  
>      ok length, 10;
> -    $test++;
> +    $test++;                         # 37
>  
>      ok length((m/>(.)/)[0]), 1;
> -    $test++;
> +    $test++;                         # 38
>  
>      ok length($&), 2;
> -    $test++;
> +    $test++;                         # 39
>  
>      ok length($'), 1;
> -    $test++;
> +    $test++;                         # 40
>  
>      ok length($`), 1;
> -    $test++;
> +    $test++;                         # 41
>  
>      ok length($1), 1;
> -    $test++;
> +    $test++;                         # 42
>  
>      ok length($tmp=$&), 2;
> -    $test++;
> +    $test++;                         # 43
>  
>      ok length($tmp=$'), 1;
> -    $test++;
> +    $test++;                         # 44
>  
>      ok length($tmp=$`), 1;
> -    $test++;
> +    $test++;                         # 45
>  
>      ok length($tmp=$1), 1;
> -    $test++;
> +    $test++;                         # 46
>  
> -    ok $&, pack("C*", ord(">"), 0342, 0230, 0272);
> -    $test++;
> +    {
> +     use bytes;
>  
> -    ok $', pack("C*", 0342, 0230, 0272);
> -    $test++;
> +        my $tmp = $&;
> +     ok $tmp, pack("C*", ord(">"), 0342, 0230, 0272);
> +     $test++;                                # 47
>  
> -    ok $`, pack("C*", 0342, 0230, 0272);
> -    $test++;
> +        $tmp = $';
> +     ok $tmp, pack("C*", 0342, 0230, 0272);
> +     $test++;                                # 48
>  
> -    ok $1, pack("C*", 0342, 0230, 0272);
> -    $test++;
> +        $tmp = $`;
> +     ok $tmp, pack("C*", 0342, 0230, 0272);
> +     $test++;                                # 49
>  
> +        $tmp = $1;
> +     ok $tmp, pack("C*", 0342, 0230, 0272);
> +     $test++;                                # 50
> +    }
>      {
>       use bytes;
>       no utf8;
>  
>       ok length, 10;
> -     $test++;
> +     $test++;                                # 51
>  
>       ok length((m/>(.)/)[0]), 1;
> -     $test++;
> +     $test++;                                # 52
>  
>       ok length($&), 2;
> -     $test++;
> +     $test++;                                # 53
>  
>       ok length($'), 5;
> -     $test++;
> +     $test++;                                # 54
>  
>       ok length($`), 3;
> -     $test++;
> +     $test++;                                # 55
>  
>       ok length($1), 1;
> -     $test++;
> +     $test++;                                # 56
>  
>       ok $&, pack("C*", ord(">"), 0342);
> -     $test++;
> +     $test++;                                # 57
>  
>       ok $', pack("C*", 0230, 0272, 0342, 0230, 0272);
> -     $test++;
> +     $test++;                                # 58
>  
>       ok $`, pack("C*", 0342, 0230, 0272);
> -     $test++;
> +     $test++;                                # 59
>  
>       ok $1, pack("C*", 0342);
> -     $test++;
> +     $test++;                                # 60
>  
>      }
>  }
> End of Patch.

Reply via email to