In perl.git, the branch blead has been updated <https://perl5.git.perl.org/perl.git/commitdiff/1385ac98c5f75358978bb05c2d6c4134413cf689?hp=25d7b7aa379d33ce2e8fe3e2bef4206b35739bc5>
- Log ----------------------------------------------------------------- commit 1385ac98c5f75358978bb05c2d6c4134413cf689 Author: David Mitchell <da...@iabyn.com> Date: Fri Mar 22 17:38:48 2019 +0000 avoid leak assigning regexp to non-COW string In something like $s = substr(.....); # $s now a non-COW SvPOK() SV $r = qr/..../; $s = $$r; $s's previous string buffer would leak when an SVt_REGEXP type SV is assigned to it. Worse, if $s was an SVt_PVPV, it would fail an assert on debugging builds. The fix is to make sure any remaining stringy stuff is cleaned up before copying the REGEXP. commit 803bd7c91c63f8f263bed592a33b10cf69f567cf Author: David Mitchell <da...@iabyn.com> Date: Fri Mar 22 15:43:56 2019 +0000 fix leak in BEGIN { threads->new(...) } Normally by the time we reach perl_destruct(), PL_parser should be null due to having its original (null) value restored by SAVEt_PARSER during leaving scope (usually before run-time starts in fact). But if a thread is created within a BEGIN block, the parser is duped, but the SAVEt_PARSER savestack entry isn't. So PL_parser never gets cleaned up. Clean it up in perl_destruct() instead. This is a bit of a hack. commit 75bb5aa48dfcf930533cd069393fc8a45e4ece18 Author: David Mitchell <da...@iabyn.com> Date: Fri Mar 22 12:31:57 2019 +0000 fix leak in cloned regexes. When a regex is cloned for a new thread, the string buffer (which holds the text of the original pattern) wasn't being freed because SvLEN was being set to 0. For example: use threads; my $r = qr/abc/; threads->new( sub { 1; })->join; In the new thread, $r is cloned but when the thread exits, the string buffer holding "(?^:abc)" was leaking. This was broken by v5.27.2-30-gdf6b4bd565. The problem was that in the cloned SV, the buffer was copied, but the SvLEN(sv) was left set at zero, which along with the SVf_FAKE, mader it look like the buffer was alien and so not freed. SvLEN was 0 in the parent thread's $r, since $r and its compile-time prototype share the same string buffer (so only the original SV has SvLEN > 0 - all the copies - within the same thread - have mother_re pointing to the original). When REs are cloned into another thread, mother_re isn't preserved, so each RE has its own copy of the buffer. ----------------------------------------------------------------------- Summary of changes: perl.c | 15 +++++++++++++++ regcomp.c | 21 +++++++++++++++++++++ t/op/qr.t | 34 +++++++++++++++++++++++++++++++++- 3 files changed, 69 insertions(+), 1 deletion(-) diff --git a/perl.c b/perl.c index cdefa99018..1ef425bb25 100644 --- a/perl.c +++ b/perl.c @@ -668,6 +668,21 @@ perl_destruct(pTHXx) FREETMPS; assert(PL_scopestack_ix == 0); + /* normally when we get here, PL_parser should be null due to having + * its original (null) value restored by SAVEt_PARSER during leaving + * scope (usually before run-time starts in fact). + * But if a thread is created within a BEGIN block, the parser is + * duped, but the SAVEt_PARSER savestack entry isn't. So PL_parser + * never gets cleaned up. + * Clean it up here instead. This is a bit of a hack. + */ + if (PL_parser) { + /* stop parser_free() stomping on PL_curcop */ + PL_parser->saved_curcop = PL_curcop; + parser_free(PL_parser); + } + + /* Need to flush since END blocks can produce output */ /* flush stdout separately, since we can identify it */ #ifdef USE_PERLIO diff --git a/regcomp.c b/regcomp.c index 547b9113e3..e13da83673 100644 --- a/regcomp.c +++ b/regcomp.c @@ -20665,7 +20665,23 @@ Perl_reg_temp_copy(pTHX_ REGEXP *dsv, REGEXP *ssv) if (!dsv) dsv = (REGEXP*) newSV_type(SVt_REGEXP); else { + assert(SvTYPE(dsv) == SVt_REGEXP || (SvTYPE(dsv) == SVt_PVLV)); + + /* our only valid caller, sv_setsv_flags(), should have done + * a SV_CHECK_THINKFIRST_COW_DROP() by now */ + assert(!SvOOK(dsv)); + assert(!SvIsCOW(dsv)); + assert(!SvROK(dsv)); + + if (SvPVX_const(dsv)) { + if (SvLEN(dsv)) + Safefree(SvPVX(dsv)); + SvPVX(dsv) = NULL; + } + SvLEN_set(dsv, 0); + SvCUR_set(dsv, 0); SvOK_off((SV *)dsv); + if (islv) { /* For PVLVs, the head (sv_any) points to an XPVLV, while * the LV's xpvlenu_rx will point to a regexp body, which @@ -20956,6 +20972,11 @@ Perl_re_dup_guts(pTHX_ const REGEXP *sstr, REGEXP *dstr, CLONE_PARAMS *param) 2: something we no longer hold a reference on so we need to copy it locally. */ RX_WRAPPED(dstr) = SAVEPVN(RX_WRAPPED_const(sstr), SvCUR(sstr)+1); + /* set malloced length to a non-zero value so it will be freed + * (otherwise in combination with SVf_FAKE it looks like an alien + * buffer). It doesn't have to be the actual malloced size, since it + * should never be grown */ + SvLEN_set(dstr, SvCUR(sstr)+1); ret->mother_re = NULL; } #endif /* PERL_IN_XSUB_RE */ diff --git a/t/op/qr.t b/t/op/qr.t index 32b9e3b23b..e03a465430 100644 --- a/t/op/qr.t +++ b/t/op/qr.t @@ -7,7 +7,7 @@ BEGIN { require './test.pl'; } -plan(tests => 34); +plan(tests => 37); sub r { return qr/Good/; @@ -135,3 +135,35 @@ sub { }; } pass("PVLV-as-REGEXP double-free of PVX"); + +# a non-cow SVPV leaked it's string buffer when a REGEXP was assigned to +# it. Give valgrind/ASan something to work on +{ + my $s = substr("ab",0,1); # generate a non-COW string + my $r1 = qr/x/; + $s = $$r1; # make sure "a" isn't leaked + pass("REGEXP leak"); + + my $dest = 0; + sub Foo99::DESTROY { $dest++ } + + # ditto but make sure we don't leak a reference + { + my $ref = bless [], "Foo99"; + my $r2 = qr/x/; + $ref = $$r2; + } + is($dest, 1, "REGEXP RV leak"); + + # and worse, assigning a REGEXP to an PVLV that had a string value + # caused an assert failure. Same code, but using $_[0] which is an + # lvalue, rather than $s. + + my %h; + sub { + $_[0] = substr("ab",0,1); # generate a non-COW string + my $r = qr/x/; + $_[0] = $$r; # make sure "a" isn't leaked + }->($h{foo}); # passes PVLV to sub + is($h{foo}, "(?^:x)", "REGEXP PVLV leak"); +} -- Perl5 Master Repository