jenkins-bot has submitted this change and it was merged. Change subject: Add --forceParse UpdaterFlag and option in forceSearchIndex script ......................................................................
Add --forceParse UpdaterFlag and option in forceSearchIndex script Bug: T116381 Change-Id: I90889e448f02b7d6baa5302cc37630db74a743f1 --- M includes/Updater.php M maintenance/forceSearchIndex.php 2 files changed, 21 insertions(+), 4 deletions(-) Approvals: Cindy-the-browser-test-bot: Looks good to me, but someone else must approve EBernhardson: Looks good to me, approved jenkins-bot: Verified diff --git a/includes/Updater.php b/includes/Updater.php index 7b12fb5..24fdc5d 100644 --- a/includes/Updater.php +++ b/includes/Updater.php @@ -10,6 +10,7 @@ use MediaWiki\Logger\LoggerFactory; use MWTimestamp; use ParserCache; +use ParserOutput; use Sanitizer; use TextContent; use Title; @@ -41,6 +42,7 @@ const INDEX_ON_SKIP = 1; const SKIP_PARSE = 2; const SKIP_LINKS = 4; + const FORCE_PARSE = 8; /** * Full title text of pages updated in this process. Used for deduplication @@ -269,6 +271,7 @@ $indexOnSkip = $flags & self::INDEX_ON_SKIP; $skipParse = $flags & self::SKIP_PARSE; $skipLinks = $flags & self::SKIP_LINKS; + $forceParse = $flags & self::FORCE_PARSE; $fullDocument = !( $skipParse || $skipLinks ); $documents = array(); @@ -304,7 +307,10 @@ if ( !$skipParse ) { // Get text to index, based on content and parser output - list( $content, $parserOutput ) = $this->getContentAndParserOutput( $page ); + list( $content, $parserOutput ) = $this->getContentAndParserOutput( + $page, + $forceParse + ); // Build our page data $pageBuilder = new PageDataBuilder( $doc, $title, $content, $parserOutput ); @@ -358,13 +364,18 @@ * Fetch page's content and parser output, using the parser cache if we can * * @param WikiPage $page The wikipage to get output for + * @param int $forceParse Bypass ParserCache and force a fresh parse. * @return array(Content,ParserOutput) */ - private function getContentAndParserOutput( $page ) { + private function getContentAndParserOutput( $page, $forceParse ) { $content = $page->getContent(); $parserOptions = $page->makeParserOptions( 'canonical' ); - $parserOutput = ParserCache::singleton()->get( $page, $parserOptions ); - if ( !$parserOutput ) { + + if ( !$forceParse ) { + $parserOutput = ParserCache::singleton()->get( $page, $parserOptions ); + } + + if ( !isset( $parserOutput ) || !$parserOutput instanceof ParserOutput ) { // We specify the revision ID here. There might be a newer revision, // but we don't care because (a) we've already got a job somewhere // in the queue to index it, and (b) we want magic words like diff --git a/maintenance/forceSearchIndex.php b/maintenance/forceSearchIndex.php index 2ae5091..cf7cf0e 100644 --- a/maintenance/forceSearchIndex.php +++ b/maintenance/forceSearchIndex.php @@ -79,6 +79,7 @@ 'This replaces the contents of the index for that entry with the entry built from a skipped process.' . 'Without this if the entry does not exist then it will be skipped entirely. Only set this when running ' . 'the first pass of building the index. Otherwise, don\'t tempt fate by indexing half complete documents.' ); + $this->addOption( 'forceParse', 'Bypass ParserCache and do a fresh parse of pages from the Content.' ); $this->addOption( 'skipParse', 'Skip parsing the page. This is really only good for running the second half ' . 'of the two phase index build. If this is specified then the default batch size is actually 50.' ); $this->addOption( 'skipLinks', 'Skip looking for links to the page (counting and finding redirects). Use ' . @@ -133,6 +134,11 @@ if ( $this->getOption( 'skipLinks' ) ) { $updateFlags |= Updater::SKIP_LINKS; } + + if ( $this->getOption( 'forceParse' ) ) { + $updateFlags |= Updater::FORCE_PARSE; + } + $this->namespace = $this->hasOption( 'namespace' ) ? intval( $this->getOption( 'namespace' ) ) : null; -- To view, visit https://gerrit.wikimedia.org/r/248345 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: merged Gerrit-Change-Id: I90889e448f02b7d6baa5302cc37630db74a743f1 Gerrit-PatchSet: 2 Gerrit-Project: mediawiki/extensions/CirrusSearch Gerrit-Branch: master Gerrit-Owner: Aude <aude.w...@gmail.com> Gerrit-Reviewer: Chad <ch...@wikimedia.org> Gerrit-Reviewer: Cindy-the-browser-test-bot <bernhardsone...@gmail.com> Gerrit-Reviewer: DCausse <dcau...@wikimedia.org> Gerrit-Reviewer: EBernhardson <ebernhard...@wikimedia.org> Gerrit-Reviewer: Manybubbles <never...@wikimedia.org> Gerrit-Reviewer: Smalyshev <smalys...@wikimedia.org> Gerrit-Reviewer: jenkins-bot <> _______________________________________________ MediaWiki-commits mailing list MediaWiki-commits@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits