In perl.git, the branch blead has been updated <http://perl5.git.perl.org/perl.git/commitdiff/1451f692e6e77e59f92339a6e76b0adb7cf0d828?hp=fb910f2b12bd712c5e59f73232256f47c5e3ea2c>
- Log ----------------------------------------------------------------- commit 1451f692e6e77e59f92339a6e76b0adb7cf0d828 Author: David Mitchell <[email protected]> Date: Fri Dec 16 13:07:58 2016 +0000 regexes: make scanning for ANYOF faster Given a character class of random chars (like [acgt] say, rather than predefined ones like [\d], say), speed up the code in: 1) S_find_byclass(), which scans for the first char in the string that's in that class (e.g. /[acgt]...../), 2) S_regrepeat() which scans past all chars that are in that class (e.g. /....[acgt]+..../) by hoisting an unchanging test outside the main while loop. So this: while (s < end) { if (ANYOF_FLAGS(node)) match = reginclass(*s, ...); else match = ANYOF_BITMAP_TEST(*s, ...); ... } becomes this: if (ANYOF_FLAGS(node)) { while (s < end) { match = reginclass(*s, ...); ... } else while (s < end) { match = ANYOF_BITMAP_TEST(*s, ...); ... } } The average of the 3 tests added to t/perf/benchmarks by this commit show this change (raw numbers, lower better): before after -------- -------- Ir 3294.0 2763.0 Dr 900.7 802.3 Dw 356.0 390.0 COND 569.0 436.7 IND 11.0 11.0 COND_m 1.2 2.0 IND_m 7.3 7.3 ----------------------------------------------------------------------- Summary of changes: regexec.c | 15 ++++++++++++--- t/perf/benchmarks | 17 +++++++++++++++++ 2 files changed, 29 insertions(+), 3 deletions(-) diff --git a/regexec.c b/regexec.c index 013ccc54a8..f6f293d56e 100644 --- a/regexec.c +++ b/regexec.c @@ -1881,8 +1881,11 @@ S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s, REXEC_FBC_UTF8_CLASS_SCAN( reginclass(prog, c, (U8*)s, (U8*) strend, utf8_target)); } + else if (ANYOF_FLAGS(c)) { + REXEC_FBC_CLASS_SCAN(reginclass(prog,c, (U8*)s, (U8*)s+1, 0)); + } else { - REXEC_FBC_CLASS_SCAN(REGINCLASS(prog, c, (U8*)s, 0)); + REXEC_FBC_CLASS_SCAN(ANYOF_BITMAP_TEST(c, *((U8*)s))); } break; @@ -8892,8 +8895,14 @@ S_regrepeat(pTHX_ regexp *prog, char **startposp, const regnode *p, scan += UTF8SKIP(scan); hardcount++; } - } else { - while (scan < loceol && REGINCLASS(prog, p, (U8*)scan, 0)) + } + else if (ANYOF_FLAGS(p)) { + while (scan < loceol + && reginclass(prog, p, (U8*)scan, (U8*)scan+1, 0)) + scan++; + } + else { + while (scan < loceol && ANYOF_BITMAP_TEST(p, *((U8*)scan))) scan++; } break; diff --git a/t/perf/benchmarks b/t/perf/benchmarks index 4a57175e1d..ac698500e3 100644 --- a/t/perf/benchmarks +++ b/t/perf/benchmarks @@ -1348,4 +1348,21 @@ setup => 'my $i = 0;', code => 'while (++$i % 4) {}', }, + + + 'regex::anyof_plus::anchored' => { + desc => '/^[acgt]+/', + setup => '$_ = "a" x 100;', + code => '/^[acgt]+/', + }, + 'regex::anyof_plus::floating' => { + desc => '/[acgt]+where match starts at position 0 for 100 chars/', + setup => '$_ = "a" x 100;', + code => '/[acgt]+/', + }, + 'regex::anyof_plus::floating_away' => { + desc => '/[acgt]+/ where match starts at position 100 for 100 chars', + setup => '$_ = ("0" x 100) . ("a" x 100);', + code => '/[acgt]+/', + }, ]; -- Perl5 Master Repository
