MaxSem has uploaded a new change for review. ( https://gerrit.wikimedia.org/r/331572 )
Change subject: getFirstChars(): don't use quantifiers with user-supplied count ...................................................................... getFirstChars(): don't use quantifiers with user-supplied count Bug: T143178 Change-Id: Iba6d929156040f5388461aaf075644d8fbf647be --- M includes/ExtractFormatter.php M tests/phpunit/ExtractFormatterTest.php 2 files changed, 9 insertions(+), 3 deletions(-) git pull ssh://gerrit.wikimedia.org:29418/mediawiki/extensions/TextExtracts refs/changes/72/331572/1 diff --git a/includes/ExtractFormatter.php b/includes/ExtractFormatter.php index 3f1c4af..307f2c7 100644 --- a/includes/ExtractFormatter.php +++ b/includes/ExtractFormatter.php @@ -120,9 +120,9 @@ if ( $length <= $requestedLength ) { return $text; } - $pattern = "#^.{{$requestedLength}}[\\w/]*>?#su"; - preg_match( $pattern, $text, $m ); - return $m[0]; + $pattern = "#^[\\w/]*>?#su"; + preg_match( $pattern, mb_substr( $text, $requestedLength ), $m ); + return mb_substr( $text, 0, $requestedLength ) . $m[0]; } /** diff --git a/tests/phpunit/ExtractFormatterTest.php b/tests/phpunit/ExtractFormatterTest.php index c633fb3..037a430 100644 --- a/tests/phpunit/ExtractFormatterTest.php +++ b/tests/phpunit/ExtractFormatterTest.php @@ -139,6 +139,9 @@ public function provideGetFirstChars() { $text = 'Lullzy lulz are lullzy!'; + $longText = str_repeat( 'тест ', 50000 ); + $longTextExpected = trim( str_repeat( 'тест ', 13108 ) ); + return [ // [ $text, 0, '' ], [ $text, 100, $text ], @@ -146,6 +149,9 @@ [ $text, 6, 'Lullzy' ], // [ $text, 7, 'Lullzy' ], [ $text, 8, 'Lullzy lulz' ], + // T143178 - previously, characters were extracted using regexps which failed when + // requesting 64K chars or more. + [ $longText, 65536, $longTextExpected ], ]; } } -- To view, visit https://gerrit.wikimedia.org/r/331572 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: newchange Gerrit-Change-Id: Iba6d929156040f5388461aaf075644d8fbf647be Gerrit-PatchSet: 1 Gerrit-Project: mediawiki/extensions/TextExtracts Gerrit-Branch: master Gerrit-Owner: MaxSem <maxsem.w...@gmail.com> _______________________________________________ MediaWiki-commits mailing list MediaWiki-commits@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits