MaxSem has uploaded a new change for review. ( 
https://gerrit.wikimedia.org/r/331572 )

Change subject: getFirstChars(): don't use quantifiers with user-supplied count
......................................................................

getFirstChars(): don't use quantifiers with user-supplied count

Bug: T143178
Change-Id: Iba6d929156040f5388461aaf075644d8fbf647be
---
M includes/ExtractFormatter.php
M tests/phpunit/ExtractFormatterTest.php
2 files changed, 9 insertions(+), 3 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/mediawiki/extensions/TextExtracts 
refs/changes/72/331572/1

diff --git a/includes/ExtractFormatter.php b/includes/ExtractFormatter.php
index 3f1c4af..307f2c7 100644
--- a/includes/ExtractFormatter.php
+++ b/includes/ExtractFormatter.php
@@ -120,9 +120,9 @@
                if ( $length <= $requestedLength ) {
                        return $text;
                }
-               $pattern = "#^.{{$requestedLength}}[\\w/]*>?#su";
-               preg_match( $pattern, $text, $m );
-               return $m[0];
+               $pattern = "#^[\\w/]*>?#su";
+               preg_match( $pattern, mb_substr( $text, $requestedLength ), $m 
);
+               return mb_substr( $text, 0, $requestedLength ) . $m[0];
        }
 
        /**
diff --git a/tests/phpunit/ExtractFormatterTest.php 
b/tests/phpunit/ExtractFormatterTest.php
index c633fb3..037a430 100644
--- a/tests/phpunit/ExtractFormatterTest.php
+++ b/tests/phpunit/ExtractFormatterTest.php
@@ -139,6 +139,9 @@
 
        public function provideGetFirstChars() {
                $text = 'Lullzy lulz are lullzy!';
+               $longText = str_repeat( 'тест ', 50000 );
+               $longTextExpected = trim( str_repeat( 'тест ', 13108 ) );
+
                return [
                        // [ $text, 0, '' ],
                        [ $text, 100, $text ],
@@ -146,6 +149,9 @@
                        [ $text, 6, 'Lullzy' ],
                        // [ $text, 7, 'Lullzy' ],
                        [ $text, 8, 'Lullzy lulz' ],
+                       // T143178 - previously, characters were extracted 
using regexps which failed when
+                       // requesting 64K chars or more.
+                       [ $longText, 65536, $longTextExpected ],
                ];
        }
 }

-- 
To view, visit https://gerrit.wikimedia.org/r/331572
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: Iba6d929156040f5388461aaf075644d8fbf647be
Gerrit-PatchSet: 1
Gerrit-Project: mediawiki/extensions/TextExtracts
Gerrit-Branch: master
Gerrit-Owner: MaxSem <maxsem.w...@gmail.com>

_______________________________________________
MediaWiki-commits mailing list
MediaWiki-commits@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to