In perl.git, the branch blead has been updated

<https://perl5.git.perl.org/perl.git/commitdiff/1385ac98c5f75358978bb05c2d6c4134413cf689?hp=25d7b7aa379d33ce2e8fe3e2bef4206b35739bc5>

- Log -----------------------------------------------------------------
commit 1385ac98c5f75358978bb05c2d6c4134413cf689
Author: David Mitchell <da...@iabyn.com>
Date:   Fri Mar 22 17:38:48 2019 +0000

    avoid leak assigning regexp to non-COW string
    
    In something like
    
        $s = substr(.....); # $s now a non-COW SvPOK() SV
        $r = qr/..../;
        $s = $$r;
    
    $s's previous string buffer would leak when an SVt_REGEXP type SV is
    assigned to it.
    
    Worse, if $s was an SVt_PVPV, it would fail an assert on debugging
    builds.
    
    The fix is to make sure any remaining stringy stuff is cleaned up
    before copying the REGEXP.

commit 803bd7c91c63f8f263bed592a33b10cf69f567cf
Author: David Mitchell <da...@iabyn.com>
Date:   Fri Mar 22 15:43:56 2019 +0000

    fix leak in BEGIN { threads->new(...) }
    
    Normally by the time we reach perl_destruct(), PL_parser should be null
    due to having its original (null) value restored by SAVEt_PARSER during
    leaving scope (usually before run-time starts in fact).  But if a thread
    is created within a BEGIN block, the parser is duped, but the
    SAVEt_PARSER savestack entry isn't. So PL_parser never gets cleaned up.
    Clean it up in perl_destruct() instead. This is a bit of a hack.

commit 75bb5aa48dfcf930533cd069393fc8a45e4ece18
Author: David Mitchell <da...@iabyn.com>
Date:   Fri Mar 22 12:31:57 2019 +0000

    fix leak in cloned regexes.
    
    When a regex is cloned for a new thread, the string buffer (which holds
    the text of the original pattern) wasn't being freed because SvLEN was
    being set to 0.
    
    For example:
    
        use threads;
        my $r = qr/abc/;
        threads->new( sub { 1; })->join;
    
    In the new thread, $r is cloned  but when the thread exits, the string
    buffer holding "(?^:abc)" was leaking.
    
    This was broken by v5.27.2-30-gdf6b4bd565.
    
    The problem was that in the cloned SV, the buffer was copied, but the
    SvLEN(sv) was left set at zero, which along with the SVf_FAKE, mader it
    look like the buffer was alien and so not freed.
    
    SvLEN was 0 in the parent thread's $r, since $r and its compile-time
    prototype share the same string buffer (so only the original SV has
    SvLEN > 0 - all the copies - within the same thread - have mother_re
    pointing to the original).
    
    When REs are cloned into another thread, mother_re isn't preserved,
    so each RE has its own copy of the buffer.

-----------------------------------------------------------------------

Summary of changes:
 perl.c    | 15 +++++++++++++++
 regcomp.c | 21 +++++++++++++++++++++
 t/op/qr.t | 34 +++++++++++++++++++++++++++++++++-
 3 files changed, 69 insertions(+), 1 deletion(-)

diff --git a/perl.c b/perl.c
index cdefa99018..1ef425bb25 100644
--- a/perl.c
+++ b/perl.c
@@ -668,6 +668,21 @@ perl_destruct(pTHXx)
     FREETMPS;
     assert(PL_scopestack_ix == 0);
 
+    /* normally when we get here, PL_parser should be null due to having
+     * its original (null) value restored by SAVEt_PARSER during leaving
+     * scope (usually before run-time starts in fact).
+     * But if a thread is created within a BEGIN block, the parser is
+     * duped, but the SAVEt_PARSER savestack entry isn't. So PL_parser
+     * never gets cleaned up.
+     * Clean it up here instead. This is a bit of a hack.
+     */
+    if (PL_parser) {
+        /* stop parser_free() stomping on PL_curcop */
+        PL_parser->saved_curcop = PL_curcop;
+        parser_free(PL_parser);
+    }
+
+
     /* Need to flush since END blocks can produce output */
     /* flush stdout separately, since we can identify it */
 #ifdef USE_PERLIO
diff --git a/regcomp.c b/regcomp.c
index 547b9113e3..e13da83673 100644
--- a/regcomp.c
+++ b/regcomp.c
@@ -20665,7 +20665,23 @@ Perl_reg_temp_copy(pTHX_ REGEXP *dsv, REGEXP *ssv)
     if (!dsv)
        dsv = (REGEXP*) newSV_type(SVt_REGEXP);
     else {
+        assert(SvTYPE(dsv) == SVt_REGEXP || (SvTYPE(dsv) == SVt_PVLV));
+
+        /* our only valid caller, sv_setsv_flags(), should have done
+         * a SV_CHECK_THINKFIRST_COW_DROP() by now */
+        assert(!SvOOK(dsv));
+        assert(!SvIsCOW(dsv));
+        assert(!SvROK(dsv));
+
+        if (SvPVX_const(dsv)) {
+            if (SvLEN(dsv))
+                Safefree(SvPVX(dsv));
+            SvPVX(dsv) = NULL;
+        }
+        SvLEN_set(dsv, 0);
+        SvCUR_set(dsv, 0);
        SvOK_off((SV *)dsv);
+
        if (islv) {
            /* For PVLVs, the head (sv_any) points to an XPVLV, while
              * the LV's xpvlenu_rx will point to a regexp body, which
@@ -20956,6 +20972,11 @@ Perl_re_dup_guts(pTHX_ const REGEXP *sstr, REGEXP 
*dstr, CLONE_PARAMS *param)
               2: something we no longer hold a reference on
               so we need to copy it locally.  */
     RX_WRAPPED(dstr) = SAVEPVN(RX_WRAPPED_const(sstr), SvCUR(sstr)+1);
+    /* set malloced length to a non-zero value so it will be freed
+     * (otherwise in combination with SVf_FAKE it looks like an alien
+     * buffer). It doesn't have to be the actual malloced size, since it
+     * should never be grown */
+    SvLEN_set(dstr, SvCUR(sstr)+1);
     ret->mother_re   = NULL;
 }
 #endif /* PERL_IN_XSUB_RE */
diff --git a/t/op/qr.t b/t/op/qr.t
index 32b9e3b23b..e03a465430 100644
--- a/t/op/qr.t
+++ b/t/op/qr.t
@@ -7,7 +7,7 @@ BEGIN {
     require './test.pl';
 }
 
-plan(tests => 34);
+plan(tests => 37);
 
 sub r {
     return qr/Good/;
@@ -135,3 +135,35 @@ sub {
     };
 }
 pass("PVLV-as-REGEXP double-free of PVX");
+
+# a non-cow SVPV leaked it's string buffer when a REGEXP was assigned to
+# it. Give valgrind/ASan something to work on
+{
+    my $s = substr("ab",0,1); # generate a non-COW string
+    my $r1 = qr/x/;
+    $s = $$r1; # make sure "a" isn't leaked
+    pass("REGEXP leak");
+
+    my $dest = 0;
+    sub Foo99::DESTROY { $dest++ }
+
+    # ditto but make sure we don't leak a reference
+    {
+        my $ref = bless [], "Foo99";
+        my $r2 = qr/x/;
+        $ref = $$r2;
+    }
+    is($dest, 1, "REGEXP RV leak");
+
+    # and worse, assigning a REGEXP to an PVLV that had a string value
+    # caused an assert failure. Same code, but using $_[0] which is an
+    # lvalue, rather than $s.
+
+    my %h;
+    sub {
+        $_[0] = substr("ab",0,1); # generate a non-COW string
+        my $r = qr/x/;
+        $_[0] = $$r; # make sure "a" isn't leaked
+    }->($h{foo}); # passes PVLV to sub
+    is($h{foo}, "(?^:x)", "REGEXP PVLV leak");
+}

-- 
Perl5 Master Repository

Reply via email to