In perl.git, the branch blead has been updated <http://perl5.git.perl.org/perl.git/commitdiff/4690a2e02d47daf03446be6bc0143d8aa16bdb9f?hp=675c73ca278d0bfeffeeb2a3f4cdea82e8b8b8c4>
- Log ----------------------------------------------------------------- commit 4690a2e02d47daf03446be6bc0143d8aa16bdb9f Author: Karl Williamson <k...@cpan.org> Date: Fri Aug 19 14:17:42 2016 -0600 perldelta: Updates for 5.24 This constitutes the changes that khw made that he thinks warrant mention in perldelta. M pod/perldelta.pod commit c8247c27c13d1cf152398e453793a91916d2185d Author: Karl Williamson <k...@cpan.org> Date: Fri Aug 19 14:07:53 2016 -0600 Encode: revert commit that introduces security holes This reverts a portion of commit 0f33e03c7e91f63bcd07b5ddfc00101715fa1fc0 which introduces some security holes in checking for UTF-8 malformations. In particular, it allows overflow in non-strict mode, and overlongs in either mode. See discussion at https://github.com/dankogai/p5-encode/issues/64 This reversion is to make sure that we don't release even a development version with known security holes. A final disposition is still to be determined M Porting/Maintainers.pl M cpan/Encode/Encode.xs M t/porting/customized.dat ----------------------------------------------------------------------- Summary of changes: Porting/Maintainers.pl | 1 + cpan/Encode/Encode.xs | 50 ++++++++++++------------------------------------ pod/perldelta.pod | 18 +++++++++++++++++ t/porting/customized.dat | 1 + 4 files changed, 32 insertions(+), 38 deletions(-) diff --git a/Porting/Maintainers.pl b/Porting/Maintainers.pl index a4e477a..1a59e09 100755 --- a/Porting/Maintainers.pl +++ b/Porting/Maintainers.pl @@ -408,6 +408,7 @@ use File::Glob qw(:case); 'Encode' => { 'DISTRIBUTION' => 'DANKOGAI/Encode-2.86.tar.gz', 'FILES' => q[cpan/Encode], + 'CUSTOMIZED' => [ qw[ Encode.xs ] ], }, 'encoding::warnings' => { diff --git a/cpan/Encode/Encode.xs b/cpan/Encode/Encode.xs index 6b4fae9..222f39b 100644 --- a/cpan/Encode/Encode.xs +++ b/cpan/Encode/Encode.xs @@ -318,39 +318,6 @@ strict_utf8(pTHX_ SV* sv) return SvTRUE(*svp); } -/* - * https://github.com/dankogai/p5-encode/pull/56#issuecomment-231959126 - */ -#ifndef UNICODE_IS_NONCHAR -#define UNICODE_IS_NONCHAR(c) ((c >= 0xFDD0 && c <= 0xFDEF) || (c & 0xFFFE) == 0xFFFE) -#endif - -static UV -convert_utf8_multi_seq(U8* s, STRLEN len, bool strict) -{ - UV uv; - - if (strict && len > 4) - return 0; - - uv = NATIVE_TO_UTF(*s) & UTF_START_MASK(len); - - len--; - s++; - - while (len--) { - if (!UTF8_IS_CONTINUATION(*s)) - return 0; - uv = UTF8_ACCUMULATE(uv, *s); - s++; - } - - if (strict && (UNICODE_IS_SURROGATE(uv) || UNICODE_IS_NONCHAR(uv) || uv > PERL_UNICODE_MAX)) - return 0; - - return uv; -} - static U8* process_utf8(pTHX_ SV* dst, U8* s, U8* e, SV *check_sv, bool encode, bool strict, bool stop_at_partial) @@ -399,12 +366,19 @@ process_utf8(pTHX_ SV* dst, U8* s, U8* e, SV *check_sv, goto malformed_byte; } - ulen = skip; - uv = convert_utf8_multi_seq(s, skip, strict); - if (uv == 0) { + uv = utf8n_to_uvuni(s, e - s, &ulen, + UTF8_CHECK_ONLY | (strict ? UTF8_ALLOW_STRICT : + UTF8_ALLOW_NONSTRICT) + ); +#if 1 /* perl-5.8.6 and older do not check UTF8_ALLOW_LONG */ + if (strict && uv > PERL_UNICODE_MAX) + ulen = (STRLEN) -1; +#endif + if (ulen == (STRLEN) -1) { if (strict) { - uv = convert_utf8_multi_seq(s, skip, 0); - if (uv == 0) + uv = utf8n_to_uvuni(s, e - s, &ulen, + UTF8_CHECK_ONLY | UTF8_ALLOW_NONSTRICT); + if (ulen == (STRLEN) -1) goto malformed_byte; goto malformed; } diff --git a/pod/perldelta.pod b/pod/perldelta.pod index d1fae22..c182040 100644 --- a/pod/perldelta.pod +++ b/pod/perldelta.pod @@ -411,6 +411,24 @@ floating point numbers (64-bit or 128-bit) and the x86 80-bit point literals will give a warning about "exponent underflow". [perl #128843, #128889, #128890, #128893, #128909, #128919] +=item * + +A regression in 5.24 with C<tr/\N{U+...}/foo/> when the code point was between +128 and 255 has been fixed. [perl #128734]. + +=item * + +A regression from the previous development release, 5.23.3, where +compiling a regular expression could crash the interpreter has been +fixed. [perl #128686]. + +=item * + +Use of a string delimiter whose code point is above 2**32 is now +supported on platforms that allow this. Note that this is non-portable, +and is based on Perl's extension to UTF-8, and is probably not +displayable nor enterable by any editor. [perl #128738] + =back =head1 Known Problems diff --git a/t/porting/customized.dat b/t/porting/customized.dat index b24e5ce..8129fac 100644 --- a/t/porting/customized.dat +++ b/t/porting/customized.dat @@ -105,3 +105,4 @@ bignum cpan/bignum/lib/bigrat.pm b8fcffd8e60bfa9f32ccb9ab8c0fa5726d6392f8 bignum cpan/bignum/lib/Math/BigFloat/Trace.pm 1ec133b0c03687fd621cc35946c465c66e38127a bignum cpan/bignum/lib/Math/BigInt/Trace.pm 3e1cc7726c55f9d5f4db6e5ec41c5fd266fcb289 version cpan/version/lib/version.pm a032a751524bdd07a93c945d2a1703abe7ad8ef0 +Encode cpan/Encode/Encode.xs dba310bf3d362b1ade421b1a741875511d84809a -- Perl5 Master Repository