In perl.git, the branch smoke-me/scream2 has been updated <http://perl5.git.perl.org/perl.git/commitdiff/c70de939cce5ba6b4bfaeefe3981e59c1cb7c6f6?hp=a63b65adef6d72b629770779e5b25623af96d787>
- Log ----------------------------------------------------------------- commit c70de939cce5ba6b4bfaeefe3981e59c1cb7c6f6 Author: Nicholas Clark <[email protected]> Date: Mon Jun 27 21:13:39 2011 +0200 Tidy code in pp_study and Perl_screaminstr() In pp_study eliminate the variable pos, which duplicates len. In Perl_screaminstr(), move the declarations of s and x to their point of use, convert a for loop to a while loop, and avoid incrementing and decrementing s. M pp.c M util.c commit 9f31a5cac5f2994757bc9ecf7e1eb422809338ba Author: Nicholas Clark <[email protected]> Date: Mon Jun 27 20:51:04 2011 +0200 Store C<study>'s data in in mg_ptr instead of interpreter variables. This allows more than one C<study> to be active at the same time. It eliminates PL_screamfirst, PL_lastscream, PL_maxscream. M embedvar.h M ext/Devel-Peek/t/Peek.t M intrpvar.h M perl.c M pod/perldelta.pod M pod/perlfunc.pod M pp.c M regexec.c M sv.c M util.c commit 2d4acb0634981d648424605f921527d9e00fc630 Author: Nicholas Clark <[email protected]> Date: Mon Jun 27 17:58:10 2011 +0200 Merge PL_scream{first,next} into one allocated buffer. Effectively, PL_screamnext is now PL_screamfirst + 256. The actual interpreter variable PL_screamnext is eliminated. M embedvar.h M intrpvar.h M perl.c M pp.c M sv.c M util.c ----------------------------------------------------------------------- Summary of changes: embedvar.h | 8 ------ ext/Devel-Peek/t/Peek.t | 36 ++++++++++++++++++++---------- intrpvar.h | 6 +---- perl.c | 8 ------ pod/perldelta.pod | 6 +++++ pod/perlfunc.pod | 5 +-- pp.c | 55 ++++++++++++++++------------------------------ regexec.c | 7 +++++- sv.c | 6 ----- util.c | 25 ++++++++++++++------ 10 files changed, 75 insertions(+), 87 deletions(-) diff --git a/embedvar.h b/embedvar.h index a540fd6..2405ee5 100644 --- a/embedvar.h +++ b/embedvar.h @@ -171,7 +171,6 @@ #define PL_last_swash_tmps (vTHX->Ilast_swash_tmps) #define PL_lastfd (vTHX->Ilastfd) #define PL_lastgotoprobe (vTHX->Ilastgotoprobe) -#define PL_lastscream (vTHX->Ilastscream) #define PL_laststatval (vTHX->Ilaststatval) #define PL_laststype (vTHX->Ilaststype) #define PL_localizing (vTHX->Ilocalizing) @@ -187,7 +186,6 @@ #define PL_markstack_ptr (vTHX->Imarkstack_ptr) #define PL_max_intro_pending (vTHX->Imax_intro_pending) #define PL_maxo (vTHX->Imaxo) -#define PL_maxscream (vTHX->Imaxscream) #define PL_maxsysfd (vTHX->Imaxsysfd) #define PL_memory_debug_header (vTHX->Imemory_debug_header) #define PL_mess_sv (vTHX->Imess_sv) @@ -268,8 +266,6 @@ #define PL_scopestack_ix (vTHX->Iscopestack_ix) #define PL_scopestack_max (vTHX->Iscopestack_max) #define PL_scopestack_name (vTHX->Iscopestack_name) -#define PL_screamfirst (vTHX->Iscreamfirst) -#define PL_screamnext (vTHX->Iscreamnext) #define PL_secondgv (vTHX->Isecondgv) #define PL_sharehook (vTHX->Isharehook) #define PL_sig_pending (vTHX->Isig_pending) @@ -505,7 +501,6 @@ #define PL_Ilast_swash_tmps PL_last_swash_tmps #define PL_Ilastfd PL_lastfd #define PL_Ilastgotoprobe PL_lastgotoprobe -#define PL_Ilastscream PL_lastscream #define PL_Ilaststatval PL_laststatval #define PL_Ilaststype PL_laststype #define PL_Ilocalizing PL_localizing @@ -521,7 +516,6 @@ #define PL_Imarkstack_ptr PL_markstack_ptr #define PL_Imax_intro_pending PL_max_intro_pending #define PL_Imaxo PL_maxo -#define PL_Imaxscream PL_maxscream #define PL_Imaxsysfd PL_maxsysfd #define PL_Imemory_debug_header PL_memory_debug_header #define PL_Imess_sv PL_mess_sv @@ -602,8 +596,6 @@ #define PL_Iscopestack_ix PL_scopestack_ix #define PL_Iscopestack_max PL_scopestack_max #define PL_Iscopestack_name PL_scopestack_name -#define PL_Iscreamfirst PL_screamfirst -#define PL_Iscreamnext PL_screamnext #define PL_Isecondgv PL_secondgv #define PL_Isharehook PL_sharehook #define PL_Isig_pending PL_sig_pending diff --git a/ext/Devel-Peek/t/Peek.t b/ext/Devel-Peek/t/Peek.t index 5a007af..642d34c 100644 --- a/ext/Devel-Peek/t/Peek.t +++ b/ext/Devel-Peek/t/Peek.t @@ -857,17 +857,14 @@ unless ($Config{useithreads}) { do_test('regular string constant', beer, 'SV = PV\\($ADDR\\) at $ADDR - REFCNT = 5 + REFCNT = 6 FLAGS = \\(PADMY,POK,READONLY,pPOK\\) PV = $ADDR "foamy"\\\0 CUR = 5 LEN = \d+ '); - is(study beer, 1, "Our studies were successful"); - - do_test('string constant now studied', beer, -'SV = PVMG\\($ADDR\\) at $ADDR + my $want = 'SV = PVMG\\($ADDR\\) at $ADDR REFCNT = 6 FLAGS = \\(PADMY,SMG,POK,READONLY,pPOK,SCREAM\\) IV = 0 @@ -878,22 +875,37 @@ unless ($Config{useithreads}) { MAGIC = $ADDR MG_VIRTUAL = &PL_vtbl_regexp MG_TYPE = PERL_MAGIC_study\\(G\\) -'); + MG_LEN = 1044 + MG_PTR = $ADDR "\\\\377\\\\377\\\\377\\\\377.*" +'; + + is(study beer, 1, "Our studies were successful"); + + do_test('string constant now studied', beer, $want); is (eval 'index "not too foamy", beer', 8, 'correct index'); - do_test('string constant still studied', beer, -'SV = PVMG\\($ADDR\\) at $ADDR - REFCNT = 6 - FLAGS = \\(PADMY,SMG,POK,READONLY,pPOK,SCREAM\\) + do_test('string constant still studied', beer, $want); + + my $pie = 'good'; + + is(study $pie, 1, "Our studies were successful"); + + do_test('string constant still studied', beer, $want); + + do_test('second string also studied', $pie, 'SV = PVMG\\($ADDR\\) at $ADDR + REFCNT = 1 + FLAGS = \\(PADMY,SMG,POK,pPOK,SCREAM\\) IV = 0 NV = 0 - PV = $ADDR "foamy"\\\0 - CUR = 5 + PV = $ADDR "good"\\\0 + CUR = 4 LEN = \d+ MAGIC = $ADDR MG_VIRTUAL = &PL_vtbl_regexp MG_TYPE = PERL_MAGIC_study\\(G\\) + MG_LEN = 1040 + MG_PTR = $ADDR "\\\\377\\\\377\\\\377\\\\377.*" '); } diff --git a/intrpvar.h b/intrpvar.h index 9dda6a3..cb8a861 100644 --- a/intrpvar.h +++ b/intrpvar.h @@ -155,10 +155,6 @@ PERLVAR(Iefloatsize, STRLEN) /* regex stuff */ -PERLVAR(Iscreamfirst, I32 *) -PERLVAR(Iscreamnext, I32 *) -PERLVAR(Ilastscream, SV *) - PERLVAR(Ireg_state, struct re_save_state) PERLVAR(Iregdummy, regnode) /* from regcomp.c */ @@ -233,7 +229,7 @@ When you replace this variable, it is considered a good practice to store the po PERLVARI(Iopfreehook, Perl_ophook_t, 0) /* op_free() hook */ -PERLVARI(Imaxscream, I32, -1) +/* Space for U32 */ PERLVARI(Ireginterp_cnt,I32, 0) /* Whether "Regexp" was interpolated. */ PERLVARI(Iwatchaddr, char **, 0) PERLVAR(Iwatchok, char *) diff --git a/perl.c b/perl.c index 417b2fd..e345ae1 100644 --- a/perl.c +++ b/perl.c @@ -905,14 +905,6 @@ perl_destruct(pTHXx) /* defgv, aka *_ should be taken care of elsewhere */ - /* clean up after study() */ - SvREFCNT_dec(PL_lastscream); - PL_lastscream = NULL; - Safefree(PL_screamfirst); - PL_screamfirst = 0; - Safefree(PL_screamnext); - PL_screamnext = 0; - /* float buffer */ Safefree(PL_efloatbuf); PL_efloatbuf = NULL; diff --git a/pod/perldelta.pod b/pod/perldelta.pod index 6383796..2bdf44c 100644 --- a/pod/perldelta.pod +++ b/pod/perldelta.pod @@ -42,6 +42,12 @@ the built-in C<read> and C<recv> functions (among others) parse their arguments. This means that one can override the built-in functions with custom subroutines that parse their arguments the same way. +=head2 You can now C<study> more than one string + +The restriction that you can only have one C<study> active at a time has been +removed. You can now usefully C<study> as many strings as you want (until you +exhaust memory). + =head1 Security XXX Any security-related notices go here. In particular, any security diff --git a/pod/perlfunc.pod b/pod/perlfunc.pod index e1453e9..936d1c0 100644 --- a/pod/perlfunc.pod +++ b/pod/perlfunc.pod @@ -6756,9 +6756,8 @@ patterns you are searching and the distribution of character frequencies in the string to be searched; you probably want to compare run times with and without it to see which is faster. Those loops that scan for many short constant strings (including the constant -parts of more complex patterns) will benefit most. You may have only -one C<study> active at a time: if you study a different scalar the first -is "unstudied". (The way C<study> works is this: a linked list of every +parts of more complex patterns) will benefit most. +(The way C<study> works is this: a linked list of every character in the string to be searched is made, so we know, for example, where all the C<'k'> characters are. From each search string, the rarest character is selected, based on some static frequency tables diff --git a/pp.c b/pp.c index 61e9dc1..3a5a2a7 100644 --- a/pp.c +++ b/pp.c @@ -707,16 +707,15 @@ PP(pp_study) { dVAR; dSP; dPOPss; register unsigned char *s; - register I32 pos; register I32 ch; register I32 *sfirst; register I32 *snext; STRLEN len; + MAGIC *mg = SvMAGICAL(sv) ? mg_find(sv, PERL_MAGIC_study) : NULL; + + if (mg && SvSCREAM(sv)) + RETPUSHYES; - if (sv == PL_lastscream) { - if (SvSCREAM(sv)) - RETPUSHYES; - } s = (unsigned char*)(SvPV(sv, len)); if (len == 0 || len > I32_MAX || !SvPOK(sv) || SvUTF8(sv) || SvVALID(sv)) { /* No point in studying a zero length string, and not safe to study @@ -729,47 +728,31 @@ PP(pp_study) */ RETPUSHNO; } - pos = len; - if (PL_lastscream) { - SvSCREAM_off(PL_lastscream); - SvREFCNT_dec(PL_lastscream); - } - PL_lastscream = SvREFCNT_inc_simple(sv); + Newx(sfirst, 256 + len, I32); - if (pos > PL_maxscream) { - if (PL_maxscream < 0) { - PL_maxscream = pos + 80; - Newx(PL_screamfirst, 256, I32); - Newx(PL_screamnext, PL_maxscream, I32); - } - else { - PL_maxscream = pos + pos / 4; - Renew(PL_screamnext, PL_maxscream, I32); - } - } - - sfirst = PL_screamfirst; - snext = PL_screamnext; - - if (!sfirst || !snext) + if (!sfirst) DIE(aTHX_ "do_study: out of memory"); + SvSCREAM_on(sv); + if (!mg) + mg = sv_magicext(sv, NULL, PERL_MAGIC_study, &PL_vtbl_regexp, NULL, 0); + mg->mg_ptr = (char *) sfirst; + mg->mg_len = (256 + len) * sizeof(I32); + + snext = sfirst; for (ch = 256; ch; --ch) - *sfirst++ = -1; - sfirst -= 256; + *snext++ = -1; - while (--pos >= 0) { - register const I32 ch = s[pos]; + while (len-- > 0) { + register const I32 ch = s[len]; if (sfirst[ch] >= 0) - snext[pos] = sfirst[ch]; + snext[len] = sfirst[ch]; else - snext[pos] = -1; - sfirst[ch] = pos; + snext[len] = -1; + sfirst[ch] = (I32)len; } - SvSCREAM_on(sv); - sv_magic(sv, NULL, PERL_MAGIC_study, NULL, 0); RETPUSHYES; } diff --git a/regexec.c b/regexec.c index 7f845a4..be048fd 100644 --- a/regexec.c +++ b/regexec.c @@ -695,8 +695,13 @@ Perl_re_intuit_start(pTHX_ REGEXP * const rx, SV *sv, char *strpos, if (flags & REXEC_SCREAM) { I32 p = -1; /* Internal iterator of scream. */ I32 * const pp = data ? data->scream_pos : &p; + const MAGIC *mg; - if (PL_screamfirst[BmRARE(check)] != -1 + assert(SvMAGICAL(sv)); + mg = mg_find(sv, PERL_MAGIC_study); + assert(mg); + + if (((I32 *)mg->mg_ptr)[BmRARE(check)] != -1 || ( BmRARE(check) == '\n' && (BmPREVIOUS(check) == SvCUR(check) - 1) && SvTAIL(check) )) diff --git a/sv.c b/sv.c index 445f9d4..fffa6e9 100644 --- a/sv.c +++ b/sv.c @@ -12994,12 +12994,6 @@ perl_clone_using(PerlInterpreter *proto_perl, UV flags, /* regex stuff */ - PL_screamfirst = NULL; - PL_screamnext = NULL; - PL_maxscream = -1; /* reinits on demand */ - PL_lastscream = NULL; - - PL_regdummy = proto_perl->Iregdummy; PL_colorset = 0; /* reinits PL_colors[] */ /*PL_colors[6] = {0,0,0,0,0,0};*/ diff --git a/util.c b/util.c index 5741f1c..02ecac7 100644 --- a/util.c +++ b/util.c @@ -861,14 +861,23 @@ Perl_screaminstr(pTHX_ SV *bigstr, SV *littlestr, I32 start_shift, I32 end_shift register I32 stop_pos; register const unsigned char *littleend; I32 found = 0; + const MAGIC * mg; + I32 *screamfirst; + I32 *screamnext; PERL_ARGS_ASSERT_SCREAMINSTR; + assert(SvMAGICAL(bigstr)); + mg = mg_find(bigstr, PERL_MAGIC_study); + assert(mg); assert(SvTYPE(littlestr) == SVt_PVMG); assert(SvVALID(littlestr)); + screamfirst = (I32 *)mg->mg_ptr; + screamnext = screamfirst + 256; + pos = *old_posp == -1 - ? PL_screamfirst[BmRARE(littlestr)] : PL_screamnext[*old_posp]; + ? screamfirst[BmRARE(littlestr)] : screamnext[*old_posp]; if (pos == -1) { cant_find: if ( BmRARE(littlestr) == '\n' @@ -901,20 +910,20 @@ Perl_screaminstr(pTHX_ SV *bigstr, SV *littlestr, I32 start_shift, I32 end_shift return NULL; } while (pos < previous + start_shift) { - pos = PL_screamnext[pos]; + pos = screamnext[pos]; if (pos == -1) goto cant_find; } big -= previous; do { - register const unsigned char *s, *x; if (pos >= stop_pos) break; if (big[pos] == first) { - for (x=big+pos+1,s=little; s < littleend; /**/ ) { - if (*s++ != *x++) { - s--; + const unsigned char *s = little; + const unsigned char *x = big + pos +1; + while (s < littleend) { + if (*s != *x++) break; - } + ++s; } if (s == littleend) { *old_posp = pos; @@ -922,7 +931,7 @@ Perl_screaminstr(pTHX_ SV *bigstr, SV *littlestr, I32 start_shift, I32 end_shift found = 1; } } - pos = PL_screamnext[pos]; + pos = screamnext[pos]; } while (pos != -1); if (last && found) return (char *)(big+(*old_posp)); -- Perl5 Master Repository
