EBernhardson has uploaded a new change for review. https://gerrit.wikimedia.org/r/325328
Change subject: Add job queue option for initImageData maintenance script ...................................................................... Add job queue option for initImageData maintenance script Trying to run this script in the cluster fatals out due to memory problems somewhat regularly. The --start option helps to restart it where it fell down, but when trying to run against hundreds of wiki's that is a one-off solution that makes ensuring everything is actually visited a pain. To try and isolate errors add an option to push the parsing into the job queue. There is still the possibility to miss pages, but job queue retries should take care of us for the most part. Attempts to keep load down on the databases by making sure no more than a specified number of jobs are queued/processing at a given time. Bug: T152155 Change-Id: I3a4e3a415b2f03de0bb36ac0515241e950130fde --- M extension.json A includes/Job/InitImageDataJob.php M maintenance/initImageData.php 3 files changed, 45 insertions(+), 5 deletions(-) git pull ssh://gerrit.wikimedia.org:29418/mediawiki/extensions/PageImages refs/changes/28/325328/1 diff --git a/extension.json b/extension.json index 4e74d66..f78c44a 100644 --- a/extension.json +++ b/extension.json @@ -18,7 +18,8 @@ "ApiQueryPageImages": "includes/ApiQueryPageImages.php", "PageImages": "includes/PageImages.php", "PageImages\\Hooks\\LinksUpdateHookHandler": "includes/LinksUpdateHookHandler.php", - "PageImages\\Hooks\\ParserFileProcessingHookHandlers": "includes/ParserFileProcessingHookHandlers.php" + "PageImages\\Hooks\\ParserFileProcessingHookHandlers": "includes/ParserFileProcessingHookHandlers.php", + "PageImages\\Job\\InitImageDataJob": "includes/Job/InitImageDataJob.php" }, "Hooks": { "ParserMakeImageParams": "PageImages\\Hooks\\ParserFileProcessingHookHandlers::onParserMakeImageParams", @@ -29,6 +30,9 @@ "AfterParserFetchFileAndTitle": "PageImages\\Hooks\\ParserFileProcessingHookHandlers::onAfterParserFetchFileAndTitle", "SpecialMobileEditWatchlist::images": "PageImages::onSpecialMobileEditWatchlist_images" }, + "JobClasses": { + "InitImageDataJob": "PageImages\\Job\\InitImageDataJob" + }, "config": { "PageImagesScores": { "value": { diff --git a/includes/Job/InitImageDataJob.php b/includes/Job/InitImageDataJob.php new file mode 100644 index 0000000..c35adf8 --- /dev/null +++ b/includes/Job/InitImageDataJob.php @@ -0,0 +1,17 @@ +<?php + +namespace PageImages\Job; + +use MediaWiki\MediaWikiServices; +use RefreshLinks; + +class InitImageDataJob extends Job { + public function run() { + $lbFactory = MediaWikiServices::getInstance()->getDBLoadBalancerFactory(); + + foreach ( $this->params['page_ids'] as $id ) { + RefreshLinks::fixLinksFromArticle( $id ); + $lbFactory->waitForReplication(); + } + } +} diff --git a/maintenance/initImageData.php b/maintenance/initImageData.php index af3453d..0463056 100644 --- a/maintenance/initImageData.php +++ b/maintenance/initImageData.php @@ -7,6 +7,7 @@ require_once ( "$IP/maintenance/Maintenance.php" ); use MediaWiki\MediaWikiServices; +use PageImages\Job\InitImageDataJob; /** * @license WTFPL 2.0 @@ -21,6 +22,7 @@ $this->addOption( 'earlier-than', 'Run only on pages earlier than this timestamp', false, true ); $this->addOption( 'start', 'Starting page ID', false, true ); + $this->addOption( 'queue-pressure', 'Maximum number of jobs to enqueue at a time. If unprovided or 0 will be run in-process.', false, true ); $this->setBatchSize( 100 ); } @@ -28,7 +30,11 @@ global $wgPageImagesNamespaces; $id = $this->getOption( 'start', 0 ); - $lbFactory = MediaWikiServices::getInstance()->getDBLoadBalancerFactory(); + $queue = null; + $maxPressure = $this->getOption( 'queue-pressure', 0 ); + if ( $maxPressure > 0 ) { + $queue = JobQueueGroup::singleton(); + } do { $tables = [ 'page', 'imagelinks' ]; @@ -57,15 +63,28 @@ [ 'LIMIT' => $this->mBatchSize, 'ORDER_BY' => 'page_id', 'GROUP BY' => 'page_id' ], $joinConds ); + $page_ids = []; foreach ( $res as $row ) { - $id = $row->page_id; - RefreshLinks::fixLinksFromArticle( $id ); - $lbFactory->waitForReplication(); + $page_ids[] = $row->page_id; + } + $job = new InitImageDataJob( Title::newMainPage(), [ 'page_ids' => $page_ids ] ); + if ( $queue === null ) { + $job->run(); + } else { + $queue->push( $job ); + do { + sleep(1); + } while ( $this->getJobPressure( $queue ) >= $maxPressure ); } $this->output( "$id\n" ); } while ( $res->numRows() ); $this->output( "done\n" ); } + + private function getJobPressure( $queue ) { + $group = $queue->get( 'InitImageDataJob' ); + return $group->getSize() + $group->getAcquiredCount(); + } } $maintClass = 'InitImageData'; -- To view, visit https://gerrit.wikimedia.org/r/325328 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: newchange Gerrit-Change-Id: I3a4e3a415b2f03de0bb36ac0515241e950130fde Gerrit-PatchSet: 1 Gerrit-Project: mediawiki/extensions/PageImages Gerrit-Branch: master Gerrit-Owner: EBernhardson <ebernhard...@wikimedia.org> _______________________________________________ MediaWiki-commits mailing list MediaWiki-commits@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits