In perl.git, the branch blead has been updated

<http://perl5.git.perl.org/perl.git/commitdiff/1451f692e6e77e59f92339a6e76b0adb7cf0d828?hp=fb910f2b12bd712c5e59f73232256f47c5e3ea2c>

- Log -----------------------------------------------------------------
commit 1451f692e6e77e59f92339a6e76b0adb7cf0d828
Author: David Mitchell <[email protected]>
Date:   Fri Dec 16 13:07:58 2016 +0000

    regexes: make scanning for ANYOF faster
    
    Given a character class of random chars (like [acgt] say, rather than
    predefined ones like [\d], say), speed up the code in:
    
    1) S_find_byclass(), which scans for the first char in the string that's
       in that class (e.g. /[acgt]...../),
    2) S_regrepeat() which scans past all chars that are in that class
       (e.g. /....[acgt]+..../)
    
    by hoisting an unchanging test outside the main while loop. So this:
    
        while (s < end) {
            if (ANYOF_FLAGS(node))
                match = reginclass(*s, ...);
            else
                match = ANYOF_BITMAP_TEST(*s, ...);
            ...
        }
    
    becomes this:
    
        if (ANYOF_FLAGS(node)) {
            while (s < end) {
                match = reginclass(*s, ...);
                ...
            }
        else
            while (s < end) {
                match = ANYOF_BITMAP_TEST(*s, ...);
                ...
            }
        }
    
    The average of the 3 tests added to t/perf/benchmarks by this commit show
    this change (raw numbers, lower better):
    
             before    after
           -------- --------
        Ir   3294.0   2763.0
        Dr    900.7    802.3
        Dw    356.0    390.0
      COND    569.0    436.7
       IND     11.0     11.0
    
    COND_m      1.2      2.0
     IND_m      7.3      7.3
-----------------------------------------------------------------------

Summary of changes:
 regexec.c         | 15 ++++++++++++---
 t/perf/benchmarks | 17 +++++++++++++++++
 2 files changed, 29 insertions(+), 3 deletions(-)

diff --git a/regexec.c b/regexec.c
index 013ccc54a8..f6f293d56e 100644
--- a/regexec.c
+++ b/regexec.c
@@ -1881,8 +1881,11 @@ S_find_byclass(pTHX_ regexp * prog, const regnode *c, 
char *s,
             REXEC_FBC_UTF8_CLASS_SCAN(
                       reginclass(prog, c, (U8*)s, (U8*) strend, utf8_target));
         }
+        else if (ANYOF_FLAGS(c)) {
+            REXEC_FBC_CLASS_SCAN(reginclass(prog,c, (U8*)s, (U8*)s+1, 0));
+        }
         else {
-            REXEC_FBC_CLASS_SCAN(REGINCLASS(prog, c, (U8*)s, 0));
+            REXEC_FBC_CLASS_SCAN(ANYOF_BITMAP_TEST(c, *((U8*)s)));
         }
         break;
 
@@ -8892,8 +8895,14 @@ S_regrepeat(pTHX_ regexp *prog, char **startposp, const 
regnode *p,
                scan += UTF8SKIP(scan);
                hardcount++;
            }
-       } else {
-           while (scan < loceol && REGINCLASS(prog, p, (U8*)scan, 0))
+       }
+        else if (ANYOF_FLAGS(p)) {
+           while (scan < loceol
+                    && reginclass(prog, p, (U8*)scan, (U8*)scan+1, 0))
+               scan++;
+        }
+        else {
+           while (scan < loceol && ANYOF_BITMAP_TEST(p, *((U8*)scan)))
                scan++;
        }
        break;
diff --git a/t/perf/benchmarks b/t/perf/benchmarks
index 4a57175e1d..ac698500e3 100644
--- a/t/perf/benchmarks
+++ b/t/perf/benchmarks
@@ -1348,4 +1348,21 @@
         setup   => 'my $i = 0;',
         code    => 'while (++$i % 4) {}',
     },
+
+
+    'regex::anyof_plus::anchored' => {
+        desc    => '/^[acgt]+/',
+        setup   => '$_ = "a" x 100;',
+        code    => '/^[acgt]+/',
+    },
+    'regex::anyof_plus::floating' => {
+        desc    => '/[acgt]+where match starts at position 0 for 100 chars/',
+        setup   => '$_ = "a" x 100;',
+        code    => '/[acgt]+/',
+    },
+    'regex::anyof_plus::floating_away' => {
+        desc    => '/[acgt]+/ where match starts at position 100 for 100 
chars',
+        setup   => '$_ = ("0" x 100) . ("a" x 100);',
+        code    => '/[acgt]+/',
+    },
 ];

--
Perl5 Master Repository

Reply via email to