In perl.git, the branch blead has been updated <http://perl5.git.perl.org/perl.git/commitdiff/99543f15a007c8878501777ac5d75c1bbaef03b4?hp=f67abcc4495468f94ded423c7788a7356ea8584b>
- Log ----------------------------------------------------------------- commit 99543f15a007c8878501777ac5d75c1bbaef03b4 Author: Father Chrysostomos <spr...@cpan.org> Date: Thu Nov 1 14:49:35 2012 -0700 Fix /a++(?{})+$code_block/ This I would expect: $ perl5.16.0 -wMre=eval -e '$x = "(?{})"; /a++(?{})+$x/x' (?{})+ matches null string many times in regex; marked by <-- HERE in m/a++(?{})+ <-- HERE (?{})/ at -e line 1. Use of uninitialized value $_ in pattern match (m//) at -e line 1. It warns, but it still runs. This I would not, $ perl5.17.5 -wMre=eval -e '$x = "(?{})"; /a++(?{})+$x/x' Nested quantifiers in regex; marked by <-- HERE in m/a++ + <-- HERE (?{})/ at (eval 1) line 1. were it not for the fact that I know how it works. :-) To compile the blocks in $x without recompiling the blocks directly inside /.../, the regexp compiler blanks out the âouterâ blocks with spaces, and compiles qr'a++ +(?{})'x. But /x can see through those spaces, resulting in a change in behaviour. So use under- scores instead. M regcomp.c M t/re/rxcode.t commit 24044fabd839afe7d01d53cc50f06415298d31ec Author: Father Chrysostomos <spr...@cpan.org> Date: Thu Nov 1 13:08:17 2012 -0700 Donât leak with /(?{})$invalid_code_block/ This script was leaking: $ ./perl -Ilib -wMre=eval -e '$x = "(?{+})"; while(1){eval {/(?{})$x/}}' The mallocked array that is allocated before compilation to hold the code blocks was not being freed before the syntax error from the inner pattern ($x) was propagated. M regcomp.c commit 2032cc0cf0dc385ead62c081c08e0a66c2150481 Author: Father Chrysostomos <spr...@cpan.org> Date: Thu Nov 1 06:19:28 2012 -0700 Free detritus when croaking with /(?{})$invalid/ This script was leaking: $ ./miniperl -e 'warn $$; $x = ")"; while( 1){ eval { /(?{})$x/ }; }' The mallocked array that is allocated before compilation to hold the code blocks was not being protected properly around the first pass of compilation. M regcomp.c commit 2ac1304871ec6cab968bd70b187c30f52d230288 Author: Father Chrysostomos <spr...@cpan.org> Date: Wed Oct 31 10:02:03 2012 -0700 Stop run-time regexp blocks from leaking regexps This was leaking like a sieve: $var = '(?{})'; /stuff$var/; When a run-time regular expression has code blocks in it, those are compiled separately inside their own qr thingy (see S_compile_runtime_code in regcomp.c). In re_op_compile, the newly-compiled code blocks are stored in pRExC_state->code_blocks, which is a mallocked array. That array also holds reference counts on the regular expressions from which the code blocks derive their existence. When the whole regular expression is compiled, the code blocks are fetched from that array, and the new regular expression ends up holding a reference count on those code blockâs originating regular expressions. The reference counts that pRExC_state->code_blocks had were not low- ered when pRExC_state->code_blocks was freed, except for qr/stuff$var/ (because the qr// would take ownership of those reference counts, which would be lowered when the outer qr// itself was freed). M regcomp.c M t/op/svleak.t commit ddac780e51876b4ded3e018ba6117a0f0f69dfe1 Author: Father Chrysostomos <spr...@cpan.org> Date: Tue Oct 30 16:41:27 2012 -0700 Stop / $looks_like_block/ from leaking If an interpolated string looks as though it contains a regexp code block, the regexp compiler will evaluate it inside qr'...' and then extract the code blocks from the resulting regexp object. If it turned out to be a false positive (e.g., "[(?{})]"), then the code to handle this returned without freeing the temporary reg- exp object. M regcomp.c M t/op/svleak.t ----------------------------------------------------------------------- Summary of changes: regcomp.c | 31 ++++++++++++++++++++++++++++--- t/op/svleak.t | 17 ++++++++++++++++- t/re/rxcode.t | 9 ++++++++- 3 files changed, 52 insertions(+), 5 deletions(-) diff --git a/regcomp.c b/regcomp.c index 7114355..4979b6a 100644 --- a/regcomp.c +++ b/regcomp.c @@ -5005,7 +5005,7 @@ S_has_runtime_code(pTHX_ RExC_state_t * const pRExC_state, OP *expr, * * becomes * - * qr'a\\bc def\'ghi\\\\jkl(?{"this is runtime"})mno' + * qr'a\\bc_______________________def\'ghi\\\\jkl(?{"this is runtime"})mno' * * After eval_sv()-ing that, grab any new code blocks from the returned qr * and merge them with any code blocks of the original regexp. @@ -5058,7 +5058,7 @@ S_compile_runtime_code(pTHX_ RExC_state_t * const pRExC_state, /* blank out literal code block */ assert(pat[s] == '('); while (s <= pRExC_state->code_blocks[n].end) { - *p++ = ' '; + *p++ = '_'; s++; } s--; @@ -5096,7 +5096,10 @@ S_compile_runtime_code(pTHX_ RExC_state_t * const pRExC_state, qr_ref = POPs; PUTBACK; if (SvTRUE(ERRSV)) + { + Safefree(pRExC_state->code_blocks); Perl_croak(aTHX_ "%s", SvPVx_nolen_const(ERRSV)); + } assert(SvROK(qr_ref)); qr = SvRV(qr_ref); assert(SvTYPE(qr) == SVt_REGEXP && RX_ENGINE((REGEXP*)qr)->op_comp); @@ -5129,7 +5132,10 @@ S_compile_runtime_code(pTHX_ RExC_state_t * const pRExC_state, int i1 = 0, i2 = 0; if (!r2->num_code_blocks) /* we guessed wrong */ + { + SvREFCNT_dec(qr); return 1; + } Newx(new_block, r1->num_code_blocks + r2->num_code_blocks, @@ -5286,6 +5292,7 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count, I32 minlen = 0; U32 rx_flags; SV * VOL pat; + SV * VOL code_blocksv = NULL; /* these are all flags - maybe they should be turned * into a single int with different bit masks */ @@ -5805,11 +5812,23 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count, RExC_lastnum=0; RExC_lastparse=NULL; ); + /* reg may croak on us, not giving us a chance to free + pRExC_state->code_blocks. We cannot SAVEFREEPV it now, as we may + need it to survive as long as the regexp (qr/(?{})/). + We must check that code_blocksv is not already set, because we may + have longjmped back. */ + if (pRExC_state->code_blocks && !code_blocksv) { + code_blocksv = newSV_type(SVt_PV); + SAVEFREESV(code_blocksv); + SvPV_set(code_blocksv, (char *)pRExC_state->code_blocks); + SvLEN_set(code_blocksv, 1); /*sufficient to make sv_clear free it*/ + } if (reg(pRExC_state, 0, &flags,1) == NULL) { RExC_precomp = NULL; - Safefree(pRExC_state->code_blocks); return(NULL); } + if (code_blocksv) + SvLEN_set(code_blocksv,0); /* no you can't have it, sv_clear */ /* Here, finished first pass. Get rid of any added setjmp */ if (used_setjump) { @@ -5867,7 +5886,13 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count, ri->num_code_blocks = pRExC_state->num_code_blocks; } else + { + int n; + for (n = 0; n < pRExC_state->num_code_blocks; n++) + if (pRExC_state->code_blocks[n].src_regex) + SAVEFREESV(pRExC_state->code_blocks[n].src_regex); SAVEFREEPV(pRExC_state->code_blocks); + } { bool has_p = ((r->extflags & RXf_PMf_KEEPCOPY) == RXf_PMf_KEEPCOPY); diff --git a/t/op/svleak.t b/t/op/svleak.t index b5bd1c1..a705587 100644 --- a/t/op/svleak.t +++ b/t/op/svleak.t @@ -15,7 +15,7 @@ BEGIN { use Config; -plan tests => 38; +plan tests => 42; # run some code N times. If the number of SVs at the end of loop N is # greater than (N-1)*delta at the end of loop 1, we've got a leak @@ -282,3 +282,18 @@ leak(2, 0, sub { my @a; eval { push @a, $die_on_fetch }; }, 'pushing exploding scalar does not leak'); + + +# Run-time regexp code blocks +{ + use re 'eval'; + my @tests = ('[(?{})]','(?{})'); + for my $t (@tests) { + leak(2, 0, sub { + / $t/; + }, "/ \$x/ where \$x is $t does not leak"); + leak(2, 0, sub { + /(?{})$t/; + }, "/(?{})\$x/ where \$x is $t does not leak"); + } +} diff --git a/t/re/rxcode.t b/t/re/rxcode.t index eb144f9..16bc4b7 100644 --- a/t/re/rxcode.t +++ b/t/re/rxcode.t @@ -6,7 +6,7 @@ BEGIN { require './test.pl'; } -plan tests => 38; +plan tests => 39; $^R = undef; like( 'a', qr/^a(?{1})(?:b(?{2}))?/, 'a =~ ab?' ); @@ -84,3 +84,10 @@ cmp_ok( scalar(@var), '==', 0, '..still nothing pushed (package)' ); ok( 'abbb' =~ /^a(?{36})(?:b(?{37})|c(?{38}))+/, 'abbbb =~ a(?:b|c)+' ); ok( $^R == 37, '$^R == 37' ) or print "# \$^R=$^R\n"; } + +# Broken temporarily by the jumbo re-eval rewrite in 5.17.1; fixed in .6 +{ + use re 'eval'; + $x = "(?{})"; + is eval { "a" =~ /a++(?{})+$x/x } || $@, '1', '/a++(?{})+$code_block/' +} -- Perl5 Master Repository