In perl.git, the branch blead has been updated

<http://perl5.git.perl.org/perl.git/commitdiff/deaabfbb31a16ea25dbcf2eeeccec817bd469adf?hp=3fbaac97953bf3ca27149a4c9bd6c9893141d568>

- Log -----------------------------------------------------------------
commit deaabfbb31a16ea25dbcf2eeeccec817bd469adf
Author: Nicholas Clark <[email protected]>
Date:   Sat Mar 12 16:23:32 2011 +0000

    In utf8decode.t, use //x to add comments to the parsing regexp.
    
    Also, assign directly to variables, instead of going via $1 to $7.

M       t/op/utf8decode.t

commit 04f3c60895040abc3f378493da2af39f48e56714
Author: Nicholas Clark <[email protected]>
Date:   Sat Mar 12 16:02:48 2011 +0000

    In utf8decode.t, test that we get the expected Unicode character(s)
    
    Previously some (not all) of the "y"es cases detailed the expected code 
point.
    Add all those that were missing, and update the parsing regexp to cope with
    multiple Unicode characters.

M       t/op/utf8decode.t

commit f96a66c2d516de066230151e9a5f9b6fe603cebf
Author: Nicholas Clark <[email protected]>
Date:   Sat Mar 12 14:32:27 2011 +0000

    In utf8decode.t, constrain more tightly the testing of expected warnings.
    
    If one warning is expected, use warning_like() to test for it, which will 
fail
    if multiple warnings are generated. Where multiple warnings are generated,
    as well as testing that the first seen matches the expected warning, check 
that
    the expected number are seen. Mark as TODO 3.4.1, which Markus Kuhn 
annotates
    as "All the 10 sequences of 3.3 concatenated, you should see 10 malformed
    sequences being signalled", because currently perl generates 18 warnings.

M       t/op/utf8decode.t

commit c03ceae63f8f7a143e71b8f14156e70222a3c47b
Author: Nicholas Clark <[email protected]>
Date:   Sat Mar 12 12:26:06 2011 +0000

    In utf8decode.t, use warning_is() for the should-not-warn cases.
    
    Move the localised $SIG{__WARN__} handler into the block for the should-warn
    case, and avoid using $@ as the warnings accumulator. As an expected 
warning is
    always provided, eliminate the code for dealing with an unspecified expected
    warning. The re-ordering allows $id to be a lexical with the same scope as 
all
    others derived from the test table lines.

M       t/op/utf8decode.t

commit e1a6746056fd4573691b7765b9fd94a7996d1320
Author: Nicholas Clark <[email protected]>
Date:   Sat Mar 12 12:15:20 2011 +0000

    In utf8decode.t, move the test data from a heredoc to <DATA>
    
    As the test data is actually somewhat larger than the test code, git's diff
    shows this as moving the code upwards :-)
    Hence take advantage of the already-churning lines to remove the outermost
    block and reindent.

M       t/op/utf8decode.t

commit bb6a3342cf501f20dff5113848ea5481c3457b16
Author: Nicholas Clark <[email protected]>
Date:   Sat Mar 12 12:09:00 2011 +0000

    In utf8decode.t, remove the \x sequence strings of bytes
    
    For each test case, the information is duplicated in the hex sequences, and
    they don't rely on the "" interpolation of the heredoc that initialises the
    array @MK

M       t/op/utf8decode.t

commit e90499c4d9f1d0749636c18f943be9718d8ba6ea
Author: Nicholas Clark <[email protected]>
Date:   Sat Mar 12 11:45:52 2011 +0000

    In utf8decode.t, test that the hex sequences and \x escapes are equivalent.
    
    The hex sequences had been in the test data since they were first added in
    ba210ebec161cde0, but have never actually been used, other than for a length
    cross-check.

M       t/op/utf8decode.t

commit 680218c4ad0e98dcb8cc8aef96505a078b49404f
Author: Nicholas Clark <[email protected]>
Date:   Sat Mar 12 11:41:19 2011 +0000

    Convert utf8decode.t to test.pl

M       t/op/utf8decode.t
-----------------------------------------------------------------------

Summary of changes:
 t/op/utf8decode.t |  286 +++++++++++++++++++++++++---------------------------
 1 files changed, 138 insertions(+), 148 deletions(-)

diff --git a/t/op/utf8decode.t b/t/op/utf8decode.t
index 7befae2..ba785fa 100644
--- a/t/op/utf8decode.t
+++ b/t/op/utf8decode.t
@@ -3,6 +3,7 @@
 BEGIN {
     chdir 't' if -d 't';
     @INC = '../lib';
+    require './test.pl';
 }
 
 {
@@ -10,184 +11,173 @@ BEGIN {
     use bytes;
     my $ordwide = ord($wide);
     printf "# under use bytes ord(v256) = 0x%02x\n", $ordwide;
-    if ($ordwide == 140) {
-       print "1..0 # Skip: UTF-EBCDIC (not UTF-8) used here\n";
-       exit 0;
-    }
-    elsif ($ordwide != 196) {
+    skip_all('UTF-EBCDIC (not UTF-8) used here') if $ordwide == 140;
+
+    if ($ordwide != 196) {
        printf "# v256 starts with 0x%02x\n", $ordwide;
     }
 }
 
 no utf8;
 
-print "1..78\n";
+foreach (<DATA>) {
+    if (/^(?:\d+(?:\.\d+)?)\s/ || /^#/) {
+       # print "# $_\n";
+    } elsif (my ($id, $okay, $Unicode, $byteslen, $hex, $charslen, $experr)
+            = /^(\d+\.\d+\.\d+[bu]?)   # ID
+               \s+(y|n|N-?\d+)         # expect to pass or fail
+                \s+([0-9a-f]{1,8}(?:,[0-9a-f]{1,8})*|-) # Unicode characters
+                \s+(\d+)                # number of octets
+                \s+([0-9a-f]{2}(?::[0-9a-f]{2})*)       # octets in hex
+                \s+(\d+|-)              # number of characters
+                (?:\s+(.+))?            # expected error (or comment)
+                $/x) {
+       my @hex = split(/:/, $hex);
+       is(scalar @hex, $byteslen, 'Amount of hex tallies with byteslen');
+       my $octets = join '', map {chr hex $_} @hex;
+       is(length $octets, $byteslen, 'Number of octets tallies with byteslen');
+       if ($okay eq 'y') {
+           my @chars = map {hex $_} split ',', $Unicode;
+           is(scalar @chars, $charslen, 'Amount of hex tallies with charslen');
+           my @got;
+           warning_is(sub {@got = unpack 'C0U*', $octets}, undef,
+                      "No warnings expected for $id");
+           is("@got", "@chars", 'Got expected Unicode characters');
+       } elsif ($okay eq 'n') {
+           isnt($experr, '', "Expected warning for $id provided");
+           warnings_like(sub {unpack 'C0U*', $octets}, [qr/$experr/],
+                        "Only expected warning for $id");
+       } elsif ($okay !~ /^N(-?\d+)/) {
+           is($okay, 'n', "Confused test description for $id");
+       } else {
+           my $expect = $1;
+           my @warnings;
 
-my $test = 1;
+           {
+               local $SIG{__WARN__} = sub {
+                   print "# $id: @_";
+                   push @warnings, "@_";
+               };
+               unpack 'C0U*', $octets;
+           }
+
+           isnt($experr, '', "Expected first warning for $id provided");
+           like($warnings[0], qr/$experr/, "Expected first warning for $id 
seen");
+           local $::TODO;
+           if ($expect < 0) {
+               $expect = -$expect;
+               $::TODO = "Markus Kuhn states that $expect invalid sequences 
should be signalled";
+           }
+           is(scalar @warnings, $expect, "Expected number of warnings for $id 
seen");
+       }
+    } else {
+       fail("unknown format '$_'");
+    }
+}
+
+done_testing();
 
 # This table is based on Markus Kuhn's UTF-8 Decode Stress Tester,
 # http://www.cl.cam.ac.uk/~mgk25/ucs/examples/UTF-8-test.txt,
 # version dated 2000-09-02.
 
-# We use the \x notation instead of raw binary bytes for \x00-\x1f\x7f-\xff
-# because e.g. many patch programs have issues with binary data.
-
-my @MK = split(/\n/, <<__EOMK__);
+__DATA__
 1      Correct UTF-8
-1.1.1 y "\xce\xba\xe1\xbd\xb9\xcf\x83\xce\xbc\xce\xb5" -               11      
ce:ba:e1:bd:b9:cf:83:ce:bc:ce:b5        5
+1.1.1 y 3ba,1f79,3c3,3bc,3b5   11      ce:ba:e1:bd:b9:cf:83:ce:bc:ce:b5        
5
 2      Boundary conditions
 2.1    First possible sequence of certain length
-2.1.1 y "\x00"                 0               1       00      1
-2.1.2 y "\xc2\x80"                     80              2       c2:80   1
-2.1.3 y "\xe0\xa0\x80"         800             3       e0:a0:80        1
-2.1.4 y "\xf0\x90\x80\x80"             10000           4       f0:90:80:80     
1
-2.1.5 y "\xf8\x88\x80\x80\x80" 200000          5       f8:88:80:80:80  1
-2.1.6 y "\xfc\x84\x80\x80\x80\x80"     4000000         6       
fc:84:80:80:80:80       1
+2.1.1 y 0              1       00      1
+2.1.2 y 80             2       c2:80   1
+2.1.3 y 800            3       e0:a0:80        1
+2.1.4 y 10000          4       f0:90:80:80     1
+2.1.5 y 200000         5       f8:88:80:80:80  1
+2.1.6 y 4000000                6       fc:84:80:80:80:80       1
 2.2    Last possible sequence of certain length
-2.2.1 y "\x7f"                 7f              1       7f      1
-2.2.2 y "\xdf\xbf"                     7ff             2       df:bf   1
+2.2.1 y 7f             1       7f      1
+2.2.2 y 7ff            2       df:bf   1
 # The ffff is legal by default since 872c91ae155f6880
-2.2.3 y "\xef\xbf\xbf"                 ffff            3       ef:bf:bf        
1       character 0xffff
-2.2.4 y "\xf7\xbf\xbf\xbf"                     1fffff          4       
f7:bf:bf:bf     1
-2.2.5 y "\xfb\xbf\xbf\xbf\xbf"                 3ffffff         5       
fb:bf:bf:bf:bf  1
-2.2.6 y "\xfd\xbf\xbf\xbf\xbf\xbf"             7fffffff        6       
fd:bf:bf:bf:bf:bf       1
+2.2.3 y ffff           3       ef:bf:bf        1       character 0xffff
+2.2.4 y 1fffff         4       f7:bf:bf:bf     1
+2.2.5 y 3ffffff                5       fb:bf:bf:bf:bf  1
+2.2.6 y 7fffffff       6       fd:bf:bf:bf:bf:bf       1
 2.3    Other boundary conditions
-2.3.1 y "\xed\x9f\xbf"         d7ff            3       ed:9f:bf        1
-2.3.2 y "\xee\x80\x80"         e000            3       ee:80:80        1
-2.3.3 y "\xef\xbf\xbd"                 fffd            3       ef:bf:bd        
1
-2.3.4 y "\xf4\x8f\xbf\xbf"             10ffff          4       f4:8f:bf:bf     
1
-2.3.5 y "\xf4\x90\x80\x80"             110000          4       f4:90:80:80     
1
+2.3.1 y d7ff           3       ed:9f:bf        1
+2.3.2 y e000           3       ee:80:80        1
+2.3.3 y fffd           3       ef:bf:bd        1
+2.3.4 y 10ffff         4       f4:8f:bf:bf     1
+2.3.5 y 110000         4       f4:90:80:80     1
 3      Malformed sequences
 3.1    Unexpected continuation bytes
-3.1.1 n "\x80"                 -               1       80      -       
unexpected continuation byte 0x80
-3.1.2 n "\xbf"                 -               1       bf      -       
unexpected continuation byte 0xbf
-3.1.3 n "\x80\xbf"                     -               2       80:bf   -       
unexpected continuation byte 0x80
-3.1.4 n "\x80\xbf\x80"         -               3       80:bf:80        -       
unexpected continuation byte 0x80
-3.1.5 n "\x80\xbf\x80\xbf"             -               4       80:bf:80:bf     
-       unexpected continuation byte 0x80
-3.1.6 n "\x80\xbf\x80\xbf\x80" -               5       80:bf:80:bf:80  -       
unexpected continuation byte 0x80
-3.1.7 n "\x80\xbf\x80\xbf\x80\xbf"     -               6       
80:bf:80:bf:80:bf       -       unexpected continuation byte 0x80
-3.1.8 n "\x80\xbf\x80\xbf\x80\xbf\x80" -               7       
80:bf:80:bf:80:bf:80    -       unexpected continuation byte 0x80
-3.1.9 n 
"\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\x
 ... [303 chars truncated]
+3.1.1 n -              1       80      -       unexpected continuation byte 
0x80
+3.1.2 n -              1       bf      -       unexpected continuation byte 
0xbf
+3.1.3 N2 -             2       80:bf   -       unexpected continuation byte 
0x80
+3.1.4 N3 -             3       80:bf:80        -       unexpected continuation 
byte 0x80
+3.1.5 N4 -             4       80:bf:80:bf     -       unexpected continuation 
byte 0x80
+3.1.6 N5 -             5       80:bf:80:bf:80  -       unexpected continuation 
byte 0x80
+3.1.7 N6 -             6       80:bf:80:bf:80:bf       -       unexpected 
continuation byte 0x80
+3.1.8 N7 -             7       80:bf:80:bf:80:bf:80    -       unexpected 
continuation byte 0x80
+3.1.9 N64 -    64      
80:81:82:83:84:85:86:87:88:89:8a:8b:8c:8d:8e:8f:90:91:92:93:94:95:96:97:98:99:9a:9b:9c:9d:9e:9f:a0:a1:a2:a3:a4:a5:a6:a7:a8:a9:aa:ab:ac:ad:ae:af:b0:b1:b2:b3:b4:b5:b6:b7:b8:b9:ba:bb:bc:b
 ... [43 chars truncated]
 3.2    Lonely start characters
-3.2.1 n "\xc0 \xc1 \xc2 \xc3 \xc4 \xc5 \xc6 \xc7 \xc8 \xc9 \xca \xcb \xcc \xcd 
\xce \xcf \xd0 \xd1 \xd2 \xd3 \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 \xda \xdb \xdc \xdd 
\xde \xdf "     -       64      c0:20:c1:20:c2:20:c3:2 ... [244 chars truncated]
-3.2.2 n "\xe0 \xe1 \xe2 \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 \xea \xeb \xec \xed 
\xee \xef "     -       32      
e0:20:e1:20:e2:20:e3:20:e4:20:e5:20:e6:20:e7:20:e8:20:e9:20:ea:20:eb:20:ec:20:ed:20:ee:20:ef:20
 -       unexp ... [67 chars truncated]
-3.2.3 n "\xf0 \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 "     -       16      
f0:20:f1:20:f2:20:f3:20:f4:20:f5:20:f6:20:f7:20 -       unexpected 
non-continuation byte 0x20, immediately after start byte 0xf0
-3.2.4 n "\xf8 \xf9 \xfa \xfb "         -       8       f8:20:f9:20:fa:20:fb:20 
-       unexpected non-continuation byte 0x20, immediately after start byte 0xf8
-3.2.5 n "\xfc \xfd "                   -       4       fc:20:fd:20     -       
unexpected non-continuation byte 0x20, immediately after start byte 0xfc
+3.2.1 N32 -    64      
c0:20:c1:20:c2:20:c3:20:c4:20:c5:20:c6:20:c7:20:c8:20:c9:20:ca:20:cb:20:cc:20:cd:20:ce:20:cf:20:d0:20:d1:20:d2:20:d3:20:d4:20:d5:20:d6:20:d7:20:d8:20:d9:20:da:20:db:20:dc:20:dd:20:de:
 ... [83 chars truncated]
+3.2.2 N16 -    32      
e0:20:e1:20:e2:20:e3:20:e4:20:e5:20:e6:20:e7:20:e8:20:e9:20:ea:20:eb:20:ec:20:ed:20:ee:20:ef:20
 -       unexpected non-continuation byte 0x20, immediately after start byte 
0xe0
+3.2.3 N8 -     16      f0:20:f1:20:f2:20:f3:20:f4:20:f5:20:f6:20:f7:20 -       
unexpected non-continuation byte 0x20, immediately after start byte 0xf0
+3.2.4 N4 -     8       f8:20:f9:20:fa:20:fb:20 -       unexpected 
non-continuation byte 0x20, immediately after start byte 0xf8
+3.2.5 N2 -     4       fc:20:fd:20     -       unexpected non-continuation 
byte 0x20, immediately after start byte 0xfc
 3.3    Sequences with last continuation byte missing
-3.3.1 n "\xc0"                 -       1       c0      -       1 byte, need 2
-3.3.2 n "\xe0\x80"                     -       2       e0:80   -       2 
bytes, need 3
-3.3.3 n "\xf0\x80\x80"         -       3       f0:80:80        -       3 
bytes, need 4
-3.3.4 n "\xf8\x80\x80\x80"             -       4       f8:80:80:80     -       
4 bytes, need 5
-3.3.5 n "\xfc\x80\x80\x80\x80" -       5       fc:80:80:80:80  -       5 
bytes, need 6
-3.3.6 n "\xdf"                 -       1       df      -       1 byte, need 2
-3.3.7 n "\xef\xbf"                     -       2       ef:bf   -       2 
bytes, need 3
-3.3.8 n "\xf7\xbf\xbf"                 -       3       f7:bf:bf        -       
3 bytes, need 4
-3.3.9 n "\xfb\xbf\xbf\xbf"                     -       4       fb:bf:bf:bf     
-       4 bytes, need 5
-3.3.10 n "\xfd\xbf\xbf\xbf\xbf"                -       5       fd:bf:bf:bf:bf  
-       5 bytes, need 6
+3.3.1 n -      1       c0      -       1 byte, need 2
+3.3.2 n -      2       e0:80   -       2 bytes, need 3
+3.3.3 n -      3       f0:80:80        -       3 bytes, need 4
+3.3.4 n -      4       f8:80:80:80     -       4 bytes, need 5
+3.3.5 n -      5       fc:80:80:80:80  -       5 bytes, need 6
+3.3.6 n -      1       df      -       1 byte, need 2
+3.3.7 n -      2       ef:bf   -       2 bytes, need 3
+3.3.8 n -      3       f7:bf:bf        -       3 bytes, need 4
+3.3.9 n -      4       fb:bf:bf:bf     -       4 bytes, need 5
+3.3.10 n -     5       fd:bf:bf:bf:bf  -       5 bytes, need 6
 3.4    Concatenation of incomplete sequences
-3.4.1 n 
"\xc0\xe0\x80\xf0\x80\x80\xf8\x80\x80\x80\xfc\x80\x80\x80\x80\xdf\xef\xbf\xf7\xbf\xbf\xfb\xbf\xbf\xbf\xfd\xbf\xbf\xbf\xbf"
     -       30      
c0:e0:80:f0:80:80:f8:80:80:80:fc:80:80:80:80:df:ef:bf:f7:bf:bf: ... [101 chars 
truncated]
+3.4.1 N-10 -   30      
c0:e0:80:f0:80:80:f8:80:80:80:fc:80:80:80:80:df:ef:bf:f7:bf:bf:fb:bf:bf:bf:fd:bf:bf:bf:bf
       -       unexpected non-continuation byte 0xe0, immediately after start 
byte 0xc0
 3.5    Impossible bytes
-3.5.1 n "\xfe"                 -       1       fe      -       byte 0xfe
-3.5.2 n "\xff"                 -       1       ff      -       byte 0xff
-3.5.3 n "\xfe\xfe\xff\xff"                     -       4       fe:fe:ff:ff     
-       byte 0xfe
+3.5.1 n -      1       fe      -       byte 0xfe
+3.5.2 n -      1       ff      -       byte 0xff
+3.5.3 N4 -     4       fe:fe:ff:ff     -       byte 0xfe
 4      Overlong sequences
 4.1    Examples of an overlong ASCII character
-4.1.1 n "\xc0\xaf"                     -       2       c0:af   -       2 
bytes, need 1
-4.1.2 n "\xe0\x80\xaf"         -       3       e0:80:af        -       3 
bytes, need 1
-4.1.3 n "\xf0\x80\x80\xaf"             -       4       f0:80:80:af     -       
4 bytes, need 1
-4.1.4 n "\xf8\x80\x80\x80\xaf" -       5       f8:80:80:80:af  -       5 
bytes, need 1
-4.1.5 n "\xfc\x80\x80\x80\x80\xaf"     -       6       fc:80:80:80:80:af       
-       6 bytes, need 1
+4.1.1 n -      2       c0:af   -       2 bytes, need 1
+4.1.2 n -      3       e0:80:af        -       3 bytes, need 1
+4.1.3 n -      4       f0:80:80:af     -       4 bytes, need 1
+4.1.4 n -      5       f8:80:80:80:af  -       5 bytes, need 1
+4.1.5 n -      6       fc:80:80:80:80:af       -       6 bytes, need 1
 4.2    Maximum overlong sequences
-4.2.1 n "\xc1\xbf"                     -       2       c1:bf   -       2 
bytes, need 1
-4.2.2 n "\xe0\x9f\xbf"         -       3       e0:9f:bf        -       3 
bytes, need 2
-4.2.3 n "\xf0\x8f\xbf\xbf"             -       4       f0:8f:bf:bf     -       
4 bytes, need 3
-4.2.4 n "\xf8\x87\xbf\xbf\xbf"         -       5       f8:87:bf:bf:bf  -       
5 bytes, need 4
-4.2.5 n "\xfc\x83\xbf\xbf\xbf\xbf"             -       6       
fc:83:bf:bf:bf:bf       -       6 bytes, need 5
+4.2.1 n -      2       c1:bf   -       2 bytes, need 1
+4.2.2 n -      3       e0:9f:bf        -       3 bytes, need 2
+4.2.3 n -      4       f0:8f:bf:bf     -       4 bytes, need 3
+4.2.4 n -      5       f8:87:bf:bf:bf  -       5 bytes, need 4
+4.2.5 n -      6       fc:83:bf:bf:bf:bf       -       6 bytes, need 5
 4.3    Overlong representation of the NUL character
-4.3.1 n "\xc0\x80"                     -       2       c0:80   -       2 
bytes, need 1
-4.3.2 n "\xe0\x80\x80"         -       3       e0:80:80        -       3 
bytes, need 1
-4.3.3 n "\xf0\x80\x80\x80"             -       4       f0:80:80:80     -       
4 bytes, need 1
-4.3.4 n "\xf8\x80\x80\x80\x80" -       5       f8:80:80:80:80  -       5 
bytes, need 1
-4.3.5 n "\xfc\x80\x80\x80\x80\x80"     -       6       fc:80:80:80:80:80       
-       6 bytes, need 1
+4.3.1 n -      2       c0:80   -       2 bytes, need 1
+4.3.2 n -      3       e0:80:80        -       3 bytes, need 1
+4.3.3 n -      4       f0:80:80:80     -       4 bytes, need 1
+4.3.4 n -      5       f8:80:80:80:80  -       5 bytes, need 1
+4.3.5 n -      6       fc:80:80:80:80:80       -       6 bytes, need 1
 5      Illegal code positions
 5.1    Single UTF-16 surrogates
-5.1.1 y "\xed\xa0\x80"         -       3       ed:a0:80        -       UTF-16 
surrogate 0xd800
-5.1.2 y "\xed\xad\xbf"                 -       3       ed:ad:bf        -       
UTF-16 surrogate 0xdb7f
-5.1.3 y "\xed\xae\x80"         -       3       ed:ae:80        -       UTF-16 
surrogate 0xdb80
-5.1.4 y "\xed\xaf\xbf"                 -       3       ed:af:bf        -       
UTF-16 surrogate 0xdbff
-5.1.5 y "\xed\xb0\x80"         -       3       ed:b0:80        -       UTF-16 
surrogate 0xdc00
-5.1.6 y "\xed\xbe\x80"         -       3       ed:be:80        -       UTF-16 
surrogate 0xdf80
-5.1.7 y "\xed\xbf\xbf"                 -       3       ed:bf:bf        -       
UTF-16 surrogate 0xdfff
+5.1.1 y d800   3       ed:a0:80        1       UTF-16 surrogate 0xd800
+5.1.2 y db7f   3       ed:ad:bf        1       UTF-16 surrogate 0xdb7f
+5.1.3 y db80   3       ed:ae:80        1       UTF-16 surrogate 0xdb80
+5.1.4 y dbff   3       ed:af:bf        1       UTF-16 surrogate 0xdbff
+5.1.5 y dc00   3       ed:b0:80        1       UTF-16 surrogate 0xdc00
+5.1.6 y df80   3       ed:be:80        1       UTF-16 surrogate 0xdf80
+5.1.7 y dfff   3       ed:bf:bf        1       UTF-16 surrogate 0xdfff
 5.2    Paired UTF-16 surrogates
-5.2.1 y "\xed\xa0\x80\xed\xb0\x80"             -       6       
ed:a0:80:ed:b0:80       -       UTF-16 surrogate 0xd800
-5.2.2 y "\xed\xa0\x80\xed\xbf\xbf"             -       6       
ed:a0:80:ed:bf:bf       -       UTF-16 surrogate 0xd800
-5.2.3 y "\xed\xad\xbf\xed\xb0\x80"             -       6       
ed:ad:bf:ed:b0:80       -       UTF-16 surrogate 0xdb7f
-5.2.4 y "\xed\xad\xbf\xed\xbf\xbf"             -       6       
ed:ad:bf:ed:bf:bf       -       UTF-16 surrogate 0xdb7f
-5.2.5 y "\xed\xae\x80\xed\xb0\x80"             -       6       
ed:ae:80:ed:b0:80       -       UTF-16 surrogate 0xdb80
-5.2.6 y "\xed\xae\x80\xed\xbf\xbf"             -       6       
ed:ae:80:ed:bf:bf       -       UTF-16 surrogate 0xdb80
-5.2.7 y "\xed\xaf\xbf\xed\xb0\x80"             -       6       
ed:af:bf:ed:b0:80       -       UTF-16 surrogate 0xdbff
-5.2.8 y "\xed\xaf\xbf\xed\xbf\xbf"             -       6       
ed:af:bf:ed:bf:bf       -       UTF-16 surrogate 0xdbff
+5.2.1 y d800,dc00      6       ed:a0:80:ed:b0:80       2       UTF-16 
surrogates 0xd800, dc00
+5.2.2 y d800,dfff      6       ed:a0:80:ed:bf:bf       2       UTF-16 
surrogates 0xd800, dfff
+5.2.3 y db7f,dc00      6       ed:ad:bf:ed:b0:80       2       UTF-16 
surrogates 0xdb7f, dc00
+5.2.4 y db7f,dfff      6       ed:ad:bf:ed:bf:bf       2       UTF-16 
surrogates 0xdb7f, dfff
+5.2.5 y db80,dc00      6       ed:ae:80:ed:b0:80       2       UTF-16 
surrogates 0xdb80, dc00
+5.2.6 y db80,dfff      6       ed:ae:80:ed:bf:bf       2       UTF-16 
surrogates 0xdb80, dfff
+5.2.7 y dbff,dc00      6       ed:af:bf:ed:b0:80       2       UTF-16 
surrogates 0xdbff, dc00
+5.2.8 y dbff,dfff      6       ed:af:bf:ed:bf:bf       2       UTF-16 
surrogates 0xdbff, dfff
 5.3    Other illegal code positions
-5.3.1 y "\xef\xbf\xbe"                 -       3       ef:bf:be        -       
byte order mark 0xfffe
+5.3.1 y fffe   3       ef:bf:be        1       byte order mark 0xfffe
 # The ffff is legal by default since 872c91ae155f6880
-5.3.2 y "\xef\xbf\xbf"                 -       3       ef:bf:bf        -       
character 0xffff
-__EOMK__
-
-# 104..181
-{
-    my $id;
-
-    local $SIG{__WARN__} = sub {
-       print "# $id: @_";
-       $@ .= "@_";
-    };
-
-    sub moan {
-       print "$id: @_";
-    }
-
-    sub warn_unpack_U {
-       $@ = '';
-       my @null = unpack('C0U*', $_[0]);
-       return $@;
-    }
-
-    for (@MK) {
-       if (/^(?:\d+(?:\.\d+)?)\s/ || /^#/) {
-           # print "# $_\n";
-       } elsif 
(/^(\d+\.\d+\.\d+[bu]?)\s+([yn])\s+"(.+)"\s+([0-9a-f]{1,8}|-)\s+(\d+)\s+([0-9a-f]{2}(?::[0-9a-f]{2})*)(?:\s+((?:\d+|-)(?:\s+(.+))?))?$/)
 {
-           $id = $1;
-           my ($okay, $bytes, $Unicode, $byteslen, $hex, $charslen, $experr) =
-               ($2, $3, $4, $5, $6, $7, $8);
-           my @hex = split(/:/, $hex);
-           unless (@hex == $byteslen) {
-               my $nhex = @hex;
-               moan "amount of hex ($nhex) not equal to byteslen 
($byteslen)\n";
-           }
-           {
-               use bytes;
-               my $bytesbyteslen = length($bytes);
-               unless ($bytesbyteslen == $byteslen) {
-                   moan "bytes length() ($bytesbyteslen) not equal to 
$byteslen\n";
-               }
-           }
-           my $warn = warn_unpack_U($bytes);
-           if ($okay eq 'y') {
-               if ($warn) {
-                   moan "unpack('C0U*') false negative\n";
-                   print "not ";
-               }
-           } elsif ($okay eq 'n') {
-               if (!$warn || ($experr ne '' && $warn !~ /$experr/)) {
-                   moan "unpack('C0U*') false positive\n";
-                   print "not ";
-               }
-           }
-           print "ok $test # $id $okay\n";
-           $test++;
-       } else {
-           moan "unknown format\n";
-       }
-    }
-}
+5.3.2 y ffff   3       ef:bf:bf        1       character 0xffff

--
Perl5 Master Repository

Reply via email to