[perl.git] branch blead, updated. v5.25.5-56-g867a901

Karl Williamson Thu, 29 Sep 2016 20:33:03 -0700

In perl.git, the branch blead has been updated

<http://perl5.git.perl.org/perl.git/commitdiff/867a901b7ec194e3895eb595338d0d0ea4fc783f?hp=5654fe3cb7747930ab73ec2386527c2d0007459f>


- Log -----------------------------------------------------------------
commit 867a901b7ec194e3895eb595338d0d0ea4fc783f
Author: Karl Williamson <[email protected]>
Date:   Thu Sep 29 21:07:19 2016 -0600

    APItest/t/utf8.t: Add some constants
    
    Though unused now, they were convenient during some debugging, and may
    be again.

M       ext/XS-APItest/t/utf8.t

commit 57b7eb534d50ce7d2c2ce15e7d1783578c55be79
Author: Karl Williamson <[email protected]>
Date:   Thu Sep 29 21:05:49 2016 -0600

    APItest/t/utf8.t: Fix 3-byte overlong test
    
    Unlike ASCII platforms, one can tell on EBCDIC ones if an otherwise
    valid 3-byte sequence is overlong on the first byte.

M       ext/XS-APItest/t/utf8.t

commit 418080dc73a4b9e525a76d6d3b5034ff616716b4
Author: Karl Williamson <[email protected]>
Date:   Thu Sep 29 21:03:30 2016 -0600

    APItest/t/utf8.t: Fix EBCDIC test
    
    Unlike on ASCII platforms, it may take more than one byte of a partial
    character to determine if it represents a code point that needs 32 or
    more bits to represent.  This fixes the test to account for that.

M       ext/XS-APItest/t/utf8.t

commit f880f78a9370d267502630934d0d3033b34389a1
Author: Karl Williamson <[email protected]>
Date:   Thu Sep 29 21:01:36 2016 -0600

    utf8.c: Add missing type specifier to declaration
    
    This code was missing a STRLEN specifier; only compiled on EBCDIC.

M       utf8.c

commit c0d8738ea8562747c49e0e96160c92ba55a7873e
Author: Karl Williamson <[email protected]>
Date:   Thu Sep 29 11:51:41 2016 -0600

    APItest/t/utf8.t: Skip some tests if major one fails
    
    If the patched test fails, the subsequent ones in the loop are
    meaningless, so don't execute them.

M       ext/XS-APItest/t/utf8.t

commit a9e5eeaac5003e6ffbb826b05a079a9b3c1cd563
Author: Karl Williamson <[email protected]>
Date:   Thu Sep 29 11:50:51 2016 -0600

    APItest/t/utf8.t: Fix typo
    
    This was a typo in the UTF-EBCDIC for a code point, so affected only
    tests on that platform

M       ext/XS-APItest/t/utf8.t

commit 7ae54b83ee5a2592c85f68253d73dcfc43161de2
Author: Karl Williamson <[email protected]>
Date:   Wed Sep 28 20:42:30 2016 -0600

    utf8n_to_uvchr() Fix EBCDIC bug with overlongs
    
    The comment removed in this commit was wrong, and so was the code it
    described.  On EBCDIC platforms, there are malformations that need to be
    converted from Unicode to native.  When I wrote that I wasn't thinking
    about overlongs, which can evaluate to any code point.  The new tests in
    d566bd20c27a46aecd668d2f739b9515f46ac74f caught this.

M       utf8.c
-----------------------------------------------------------------------

Summary of changes:
 ext/XS-APItest/t/utf8.t | 34 +++++++++++++++++++++++++++++++---
 utf8.c                  |  6 ++----
 2 files changed, 33 insertions(+), 7 deletions(-)

diff --git a/ext/XS-APItest/t/utf8.t b/ext/XS-APItest/t/utf8.t
index fd3c903..2061232 100644
--- a/ext/XS-APItest/t/utf8.t
+++ b/ext/XS-APItest/t/utf8.t
@@ -46,10 +46,19 @@ my @i8_to_native = (    # Only code page 1047 so far.
 
0xDC,0xDD,0xDE,0xDF,0xE1,0xEA,0xEB,0xEC,0xED,0xEE,0xEF,0xFA,0xFB,0xFC,0xFD,0xFE,
 );
 
+my @native_to_i8;
+for (my $i = 0; $i < 256; $i++) {
+    $native_to_i8[$i8_to_native[$i]] = $i;
+}
+
 *I8_to_native = (isASCII)
                     ? sub { return shift }
                     : sub { return join "", map { chr $i8_to_native[ord $_] }
                                             split "", shift };
+*native_to_I8 = (isASCII)
+                    ? sub { return shift }
+                    : sub { return join "", map { chr $native_to_i8[ord $_] }
+                                            split "", shift };
 
 my $is64bit = length sprintf("%x", ~0) > 8;
 
@@ -74,6 +83,10 @@ my $UTF8_DISALLOW_ILLEGAL_C9_INTERCHANGE
                              = $UTF8_DISALLOW_SUPER|$UTF8_DISALLOW_SURROGATE;
 my $UTF8_DISALLOW_ILLEGAL_INTERCHANGE
               = $UTF8_DISALLOW_ILLEGAL_C9_INTERCHANGE|$UTF8_DISALLOW_NONCHAR;
+my $UTF8_WARN_ILLEGAL_C9_INTERCHANGE
+                             = $UTF8_WARN_SUPER|$UTF8_WARN_SURROGATE;
+my $UTF8_WARN_ILLEGAL_INTERCHANGE
+              = $UTF8_WARN_ILLEGAL_C9_INTERCHANGE|$UTF8_WARN_NONCHAR;
 
 # Test uvchr_to_utf8().
 my $UNICODE_WARN_SURROGATE        = 0x0001;
@@ -189,7 +202,7 @@ my %code_points = (
     0xFDEB     => (isASCII) ? "\xef\xb7\xab" : 
I8_to_native("\xf1\xbf\xaf\xab"),
     0xFDEC     => (isASCII) ? "\xef\xb7\xac" : 
I8_to_native("\xf1\xbf\xaf\xac"),
     0xFDED     => (isASCII) ? "\xef\xb7\xad" : 
I8_to_native("\xf1\xbf\xaf\xad"),
-    0xFDEE     => (isASCII) ? "\xef\xb7\xae" : 
I8_to_native("\xf1\xbf\xae\xae"),
+    0xFDEE     => (isASCII) ? "\xef\xb7\xae" : 
I8_to_native("\xf1\xbf\xaf\xae"),
     0xFDEF     => (isASCII) ? "\xef\xb7\xaf" : 
I8_to_native("\xf1\xbf\xaf\xaf"),
     0xFDF0      => (isASCII) ? "\xef\xb7\xb0" : 
I8_to_native("\xf1\xbf\xaf\xb0"),
 
@@ -523,8 +536,12 @@ for my $u (sort { utf8::unicode_to_native($a) <=> 
utf8::unicode_to_native($b) }
     my $display_flags = sprintf "0x%x", $this_utf8_flags;
     my $display_bytes = display_bytes($bytes);
     my $ret_ref = test_utf8n_to_uvchr($bytes, $len, $this_utf8_flags);
-    is($ret_ref->[0], $n, "Verify utf8n_to_uvchr($display_bytes, 
$display_flags) returns $hex_n");
-    is($ret_ref->[1], $len, "Verify utf8n_to_uvchr() for $hex_n returns 
expected length: $len");
+
+    # Rest of tests likely meaningless if it gets the wrong code point.
+    next unless is($ret_ref->[0], $n,
+       "Verify utf8n_to_uvchr($display_bytes, $display_flags) returns $hex_n");
+    is($ret_ref->[1], $len,
+       "Verify utf8n_to_uvchr() for $hex_n returns expected length: $len");
 
     unless (is(scalar @warnings, 0,
                "Verify utf8n_to_uvchr() for $hex_n generated no warnings"))
@@ -1221,6 +1238,9 @@ foreach my $test (@malformations) {
             $ret_should_be = 1;
             $comment = ", but need 2 bytes to discern:";
         }
+        elsif ($testname =~ /overlong/ && ! isASCII && $length == 3) {
+            # 3-byte overlongs on EBCDIC are determinable on the first byte
+        }
         elsif ($testname =~ /overlong/ && $length > 2) {
             if ($length <= 7 && $j < 2) {
                 $ret_should_be = 1;
@@ -1802,6 +1822,14 @@ foreach my $test (@tests) {
                         $ret_should_be = 1;
                         $comment .= ", but need 2 bytes to discern";
                     }
+                    elsif (   ! isASCII
+                           && $testname =~ /requires at least 32 bits/)
+                    {
+                        # On EBCDIC, the boundary between 31 and 32 bits is
+                        # more complicated.
+                        $ret_should_be = 1 if native_to_I8($partial) le
+                     
"\xFF\xA0\xA0\xA0\xA0\xA0\xA0\xA1\xBF\xBF\xBF\xBF\xBF\xBF";
+                    }
                 }
 
                 undef @warnings;
diff --git a/utf8.c b/utf8.c
index 7f8df9d..ec550c3 100644
--- a/utf8.c
+++ b/utf8.c
@@ -386,7 +386,7 @@ S_is_utf8_cp_above_31_bits(const U8 * const s, const U8 * 
const e)
     const U8 * const prefix = "\x41\x41\x41\x41\x41\x41\x42";
     const STRLEN prefix_len = sizeof(prefix) - 1;
     const STRLEN len = e - s;
-    const cmp_len = MIN(prefix_len, len - 1);
+    const STRLEN cmp_len = MIN(prefix_len, len - 1);
 
 #else
 
@@ -1046,9 +1046,7 @@ Perl_utf8n_to_uvchr(pTHX_ const U8 *s, STRLEN curlen, 
STRLEN *retlen, U32 flags)
        }
 
        if (sv) {
-            outlier_ret = uv;   /* Note we don't bother to convert to native,
-                                   as all the outlier code points are the same
-                                   in both ASCII and EBCDIC */
+            outlier_ret = UNI_TO_NATIVE(uv);
            goto do_warn;
        }
 

--
Perl5 Master Repository

[perl.git] branch blead, updated. v5.25.5-56-g867a901

Reply via email to