In perl.git, the branch blead has been updated <http://perl5.git.perl.org/perl.git/commitdiff/109ac342a6bc5a3a67c3b52341607100cedafdf7?hp=3781748131a087d117c33ad25b5211eb3c33afff>
- Log ----------------------------------------------------------------- commit 109ac342a6bc5a3a67c3b52341607100cedafdf7 Author: Karl Williamson <k...@cpan.org> Date: Sat Aug 27 19:16:17 2016 -0600 PATCH: [perl #129038] Crash with s///l The cause of this was bad logic. It thought it was dealing with UTF-8 when it wasn't. M regexec.c M t/re/subst.t commit ce66b506fa280c2ede0b0d4a3e81b53d0e31cb48 Author: Karl Williamson <k...@cpan.org> Date: Sun Aug 28 09:39:38 2016 -0600 perlinterp: Use 'e.g' not 'i.e.' for 'for example' M pod/perlinterp.pod ----------------------------------------------------------------------- Summary of changes: pod/perlinterp.pod | 2 +- regexec.c | 27 +++++++++++++++------------ t/re/subst.t | 19 ++++++++++++++++++- 3 files changed, 34 insertions(+), 14 deletions(-) diff --git a/pod/perlinterp.pod b/pod/perlinterp.pod index e1af333..00a7b9d 100644 --- a/pod/perlinterp.pod +++ b/pod/perlinterp.pod @@ -655,7 +655,7 @@ And here is the function from F<op.c>: 18 return o; 19 } -One particularly interesting aspect is that if the op has no kids (i.e., +One particularly interesting aspect is that if the op has no kids (e.g., C<readline()> or C<< <> >>) the op is freed and replaced with an entirely new one that references C<*ARGV> (lines 12-16). diff --git a/regexec.c b/regexec.c index fad8876..b86cb1b 100644 --- a/regexec.c +++ b/regexec.c @@ -6341,23 +6341,26 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog) if (! (to_complement ^ cBOOL(isFOO_lc(FLAGS(scan), (U8) nextchr)))) { sayNO; } + + locinput++; + break; } - else if (UTF8_IS_DOWNGRADEABLE_START(nextchr)) { - if (! (to_complement ^ cBOOL(isFOO_lc(FLAGS(scan), - EIGHT_BIT_UTF8_TO_NATIVE(nextchr, - *(locinput + 1)))))) - { - sayNO; - } - } - else { /* Here, must be an above Latin-1 code point */ + + if (! UTF8_IS_DOWNGRADEABLE_START(nextchr)) { /* An above Latin-1 code point */ _CHECK_AND_OUTPUT_WIDE_LOCALE_UTF8_MSG(locinput, reginfo->strend); goto utf8_posix_above_latin1; } - /* Here, must be utf8 */ - locinput += UTF8SKIP(locinput); - break; + /* Here is a UTF-8 variant code point below 256 and the target is + * UTF-8 */ + if (! (to_complement ^ cBOOL(isFOO_lc(FLAGS(scan), + EIGHT_BIT_UTF8_TO_NATIVE(nextchr, + *(locinput + 1)))))) + { + sayNO; + } + + goto increment_locinput; case NPOSIXD: /* \W or [:^punct:] etc. under /d */ to_complement = 1; diff --git a/t/re/subst.t b/t/re/subst.t index 2de1a7b..d32e7b8 100644 --- a/t/re/subst.t +++ b/t/re/subst.t @@ -11,7 +11,7 @@ BEGIN { require './loc_tools.pl'; } -plan( tests => 270 ); +plan( tests => 271 ); $_ = 'david'; $a = s/david/rules/r; @@ -1102,3 +1102,20 @@ SKIP: { $s =~ s/..\G//g; is($s, "\x{123}", "#RT 126260 gofs"); } + +SKIP: { + if (! locales_enabled('LC_CTYPE')) { + skip "Can't test locale", 1; + } + + # To cause breakeage, we need a locale in which \xff matches whatever + # POSIX class is used in the pattern. Easiest is C, with \W. + fresh_perl_is(' use POSIX qw(locale_h); + setlocale(&POSIX::LC_CTYPE, "C"); + my $s = "\xff"; + $s =~ s/\W//l; + print qq(ok$s\n)', + "ok\n", + {stderr => 1 }, + '[perl #129038 ] s/\xff//l no longer crashes'); +} -- Perl5 Master Repository