jenkins-bot has submitted this change and it was merged. Change subject: Improve MultiStringReplacer performance ......................................................................
Improve MultiStringReplacer performance Don't call MultiStringMatcher::searchIn, because that way we end up having to walk the results array again to construct the replacement matches array. Instead, perform the search in MultiStringReplacer itself. Change-Id: I2101f9764c4ddb8f4f7d2b2d90368a9ca04c6862 --- M .gitignore M bench/bench.php M src/MultiStringReplacer.php 3 files changed, 14 insertions(+), 9 deletions(-) Approvals: Ori.livneh: Looks good to me, approved jenkins-bot: Verified diff --git a/.gitignore b/.gitignore index f6f384e..4f51d1a 100644 --- a/.gitignore +++ b/.gitignore @@ -2,3 +2,5 @@ /doc/html vendor/ composer.lock +bench/23835-0.txt +bench/ZhConversion.php diff --git a/bench/bench.php b/bench/bench.php index 7562be1..b537025 100644 --- a/bench/bench.php +++ b/bench/bench.php @@ -6,11 +6,12 @@ use AhoCorasick\MultiStringMatcher; if ( !file_exists( __DIR__ . '/23835-0.txt' ) ) { - die( 'Please download http://www.gutenberg.org/files/23835/23835-0.txt' ); + die( "Please download http://www.gutenberg.org/files/23835/23835-0.txt\n" ); } if ( !file_exists( __DIR__ . '/ZhConversion.php' ) ) { - die( 'You need ZhConversion.php, from http://git.io/vIMst' ); + die( "You need ZhConversion.php, from " . + "https://github.com/wikimedia/mediawiki/blob/master/includes/ZhConversion.php\n" ); } require_once __DIR__ . '/ZhConversion.php'; diff --git a/src/MultiStringReplacer.php b/src/MultiStringReplacer.php index ee1ace0..ab3ebc4 100644 --- a/src/MultiStringReplacer.php +++ b/src/MultiStringReplacer.php @@ -72,14 +72,16 @@ * @endcode */ public function searchAndReplace( $text ) { - if ( !$this->searchKeywords || $text === '' ) { - return $text; - } - + $state = 0; + $length = strlen( $text ); $matches = array(); - foreach ( $this->searchIn( $text ) as $result ) { - list( $offset, $match ) = $result; - $matches[$offset] = $match; + for ( $i = 0; $i < $length; $i++ ) { + $ch = $text[$i]; + $state = $this->nextState( $state, $ch ); + foreach ( $this->outputs[$state] as $match ) { + $offset = $i - $this->searchKeywords[$match] + 1; + $matches[$offset] = $match; + } } ksort( $matches ); -- To view, visit https://gerrit.wikimedia.org/r/218121 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: merged Gerrit-Change-Id: I2101f9764c4ddb8f4f7d2b2d90368a9ca04c6862 Gerrit-PatchSet: 2 Gerrit-Project: AhoCorasick Gerrit-Branch: master Gerrit-Owner: Ori.livneh <o...@wikimedia.org> Gerrit-Reviewer: Ori.livneh <o...@wikimedia.org> Gerrit-Reviewer: jenkins-bot <> _______________________________________________ MediaWiki-commits mailing list MediaWiki-commits@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits