Aude has uploaded a new change for review.

  https://gerrit.wikimedia.org/r/249036

Change subject: Add --forceParse UpdaterFlag and option in forceSearchIndex 
script
......................................................................

Add --forceParse UpdaterFlag and option in forceSearchIndex script

Bug: T116381
Change-Id: I90889e448f02b7d6baa5302cc37630db74a743f1
(cherry picked from commit f2616c9d4d5e835d061f40c338b55e207acaa623)
---
M includes/Updater.php
M maintenance/forceSearchIndex.php
2 files changed, 21 insertions(+), 4 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/mediawiki/extensions/CirrusSearch 
refs/changes/36/249036/1

diff --git a/includes/Updater.php b/includes/Updater.php
index 7b12fb5..24fdc5d 100644
--- a/includes/Updater.php
+++ b/includes/Updater.php
@@ -10,6 +10,7 @@
 use MediaWiki\Logger\LoggerFactory;
 use MWTimestamp;
 use ParserCache;
+use ParserOutput;
 use Sanitizer;
 use TextContent;
 use Title;
@@ -41,6 +42,7 @@
        const INDEX_ON_SKIP = 1;
        const SKIP_PARSE = 2;
        const SKIP_LINKS = 4;
+       const FORCE_PARSE = 8;
 
        /**
         * Full title text of pages updated in this process.  Used for 
deduplication
@@ -269,6 +271,7 @@
                $indexOnSkip = $flags & self::INDEX_ON_SKIP;
                $skipParse = $flags & self::SKIP_PARSE;
                $skipLinks = $flags & self::SKIP_LINKS;
+               $forceParse = $flags & self::FORCE_PARSE;
                $fullDocument = !( $skipParse || $skipLinks );
 
                $documents = array();
@@ -304,7 +307,10 @@
 
                        if ( !$skipParse ) {
                                // Get text to index, based on content and 
parser output
-                               list( $content, $parserOutput ) = 
$this->getContentAndParserOutput( $page );
+                               list( $content, $parserOutput ) = 
$this->getContentAndParserOutput(
+                                       $page,
+                                       $forceParse
+                               );
 
                                // Build our page data
                                $pageBuilder = new PageDataBuilder( $doc, 
$title, $content, $parserOutput );
@@ -358,13 +364,18 @@
         * Fetch page's content and parser output, using the parser cache if we 
can
         *
         * @param WikiPage $page The wikipage to get output for
+        * @param int $forceParse Bypass ParserCache and force a fresh parse.
         * @return array(Content,ParserOutput)
         */
-       private function getContentAndParserOutput( $page ) {
+       private function getContentAndParserOutput( $page, $forceParse ) {
                $content = $page->getContent();
                $parserOptions = $page->makeParserOptions( 'canonical' );
-               $parserOutput = ParserCache::singleton()->get( $page, 
$parserOptions );
-               if ( !$parserOutput ) {
+
+               if ( !$forceParse ) {
+                       $parserOutput = ParserCache::singleton()->get( $page, 
$parserOptions );
+               }
+
+               if ( !isset( $parserOutput ) || !$parserOutput instanceof 
ParserOutput ) {
                        // We specify the revision ID here. There might be a 
newer revision,
                        // but we don't care because (a) we've already got a 
job somewhere
                        // in the queue to index it, and (b) we want magic 
words like
diff --git a/maintenance/forceSearchIndex.php b/maintenance/forceSearchIndex.php
index 2ae5091..cf7cf0e 100644
--- a/maintenance/forceSearchIndex.php
+++ b/maintenance/forceSearchIndex.php
@@ -79,6 +79,7 @@
                        'This replaces the contents of the index for that entry 
with the entry built from a skipped process.' .
                        'Without this if the entry does not exist then it will 
be skipped entirely.  Only set this when running ' .
                        'the first pass of building the index.  Otherwise, 
don\'t tempt fate by indexing half complete documents.' );
+               $this->addOption( 'forceParse', 'Bypass ParserCache and do a 
fresh parse of pages from the Content.' );
                $this->addOption( 'skipParse', 'Skip parsing the page.  This is 
really only good for running the second half ' .
                        'of the two phase index build.  If this is specified 
then the default batch size is actually 50.' );
                $this->addOption( 'skipLinks', 'Skip looking for links to the 
page (counting and finding redirects).  Use ' .
@@ -133,6 +134,11 @@
                if ( $this->getOption( 'skipLinks' ) ) {
                        $updateFlags |= Updater::SKIP_LINKS;
                }
+
+               if ( $this->getOption( 'forceParse' ) ) {
+                       $updateFlags |= Updater::FORCE_PARSE;
+               }
+
                $this->namespace = $this->hasOption( 'namespace' ) ?
                        intval( $this->getOption( 'namespace' ) ) : null;
 

-- 
To view, visit https://gerrit.wikimedia.org/r/249036
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: I90889e448f02b7d6baa5302cc37630db74a743f1
Gerrit-PatchSet: 1
Gerrit-Project: mediawiki/extensions/CirrusSearch
Gerrit-Branch: wmf/1.27.0-wmf.3
Gerrit-Owner: Aude <aude.w...@gmail.com>

_______________________________________________
MediaWiki-commits mailing list
MediaWiki-commits@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to