Daniel Kinzler has uploaded a new change for review. ( 
https://gerrit.wikimedia.org/r/377046 )

Change subject: Allow batch sizes for different jobs to be defined separately.
......................................................................

Allow batch sizes for different jobs to be defined separately.

In particular, the batch size for UpdateHtmlCacheJob can be quite large,
while the batch size for the slow running RefreshLinksJob should be
rather small.

Bug: T173710
Change-Id: Ic095e2eba985ea3a6e51645d0be58589b326f218
---
M client/config/WikibaseClient.default.php
M client/includes/Changes/InjectRCRecordsJob.php
M client/includes/Changes/WikiPageUpdater.php
M client/includes/WikibaseClient.php
M client/tests/phpunit/includes/Changes/InjectRCRecordsJobTest.php
M docs/options.wiki
6 files changed, 88 insertions(+), 30 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/mediawiki/extensions/Wikibase 
refs/changes/46/377046/1

diff --git a/client/config/WikibaseClient.default.php 
b/client/config/WikibaseClient.default.php
index f75c05c..ca7f007 100644
--- a/client/config/WikibaseClient.default.php
+++ b/client/config/WikibaseClient.default.php
@@ -96,8 +96,23 @@
                                $settings->getSetting( 'hasFullEntityIdColumn' 
) : true;
                },
 
-               // Database batch size in WikiPageUpdater which ChangeHandler 
uses
-               'wikiPageUpdaterDbBatchSize' => 50,
+               // Batch size for UpdateHtmlCacheJob
+               'purgeCacheBatchSize' => function ( SettingsArray $settings ) {
+                       return $settings->hasSetting( 
'wikiPageUpdaterDbBatchSize' ) ?
+                               $settings->getSetting( 
'wikiPageUpdaterDbBatchSize' ) : 100;
+               },
+
+               // Batch size for InjectRCRecordsJob
+               'recentChangesBatchSize' => function ( SettingsArray $settings 
) {
+                       return $settings->hasSetting( 
'wikiPageUpdaterDbBatchSize' ) ?
+                               $settings->getSetting( 
'wikiPageUpdaterDbBatchSize' ) : 100;
+               },
+
+               // Batch size for RefreshLinksJobs
+               'refreshLinksBatchSize' => function ( SettingsArray $settings ) 
{
+                       return $settings->hasSetting( 
'wikiPageUpdaterDbBatchSize' ) ?
+                               $settings->getSetting( 
'wikiPageUpdaterDbBatchSize' ) : 5;
+               },
        ];
 
        // Some defaults depend on information not available at this time.
diff --git a/client/includes/Changes/InjectRCRecordsJob.php 
b/client/includes/Changes/InjectRCRecordsJob.php
index dd220ed..ded5067 100644
--- a/client/includes/Changes/InjectRCRecordsJob.php
+++ b/client/includes/Changes/InjectRCRecordsJob.php
@@ -68,21 +68,18 @@
        private $stats = null;
 
        /**
-        * @var int Batch size for database operations
-        */
-       private $dbBatchSize = 100;
-
-       /**
         * @param Title[] $titles
         * @param EntityChange $change
         * @param array $rootJobParams
+        * @param int $batchSize
         *
         * @return JobSpecification
         */
        public static function makeJobSpecification(
                array $titles,
                EntityChange $change,
-               array $rootJobParams = []
+               array $rootJobParams = [],
+               $batchSize = 100
        ) {
                $pages = [];
 
@@ -99,7 +96,8 @@
                // See JobQueueChangeNotificationSender::getJobSpecification 
for relevant root job parameters.
                $params = array_merge( $rootJobParams, [
                        'change' => $changeData,
-                       'pages' => $pages
+                       'pages' => $pages,
+                       'batchSize' => $batchSize,
                ] );
 
                return new JobSpecification(
@@ -190,11 +188,11 @@
        }
 
        /**
-        * @param int $dbBatchSize
+        * @return int
         */
-       public function setDbBatchSize( $dbBatchSize ) {
-               Assert::parameterType( 'integer', $dbBatchSize, '$dbBatchSize' 
);
-               $this->dbBatchSize = $dbBatchSize;
+       private function getBatchSize() {
+               $params = $this->getParams();
+               return isset( $params['batchSize'] ) ? intval( 
$params['batchSize'] ) : 100;
        }
 
        /**
@@ -286,7 +284,7 @@
                                $rc->save();
                        }
 
-                       if ( ++$c >= $this->dbBatchSize ) {
+                       if ( ++$c >= $this->getBatchSize() ) {
                                $this->lbFactory->commitAndWaitForReplication( 
__METHOD__, $trxToken );
                                $trxToken = 
$this->lbFactory->getEmptyTransactionTicket( __METHOD__ );
                                $c = 0;
diff --git a/client/includes/Changes/WikiPageUpdater.php 
b/client/includes/Changes/WikiPageUpdater.php
index 6e66fcb..363cd73 100644
--- a/client/includes/Changes/WikiPageUpdater.php
+++ b/client/includes/Changes/WikiPageUpdater.php
@@ -10,7 +10,6 @@
 use Wikibase\Client\RecentChanges\RecentChangeFactory;
 use Wikibase\Client\RecentChanges\RecentChangesDuplicateDetector;
 use Wikibase\EntityChange;
-use Wikimedia\Assert\Assert;
 use Wikimedia\Rdbms\LBFactory;
 
 /**
@@ -40,9 +39,19 @@
        private $LBFactory;
 
        /**
-        * @var int Batch size for database operations
+        * @var int Batch size for UpdateHtmlCacheJob
         */
-       private $dbBatchSize = 50;
+       private $purgeCacheBatchSize = 100;
+
+       /**
+        * @var int Batch size for InjectRCRecordsJob
+        */
+       private $rcBatchSize = 100;
+
+       /**
+        * @var int Batch size for RefreshLinksJobs
+        */
+       private $refreshLinksBatchSize = 5;
 
        /**
         * @var RecentChangesDuplicateDetector|null
@@ -78,17 +87,43 @@
        /**
         * @return int
         */
-       public function getDbBatchSize() {
-               return $this->dbBatchSize;
+       public function getPurgeCacheBatchSize() {
+               return $this->purgeCacheBatchSize;
        }
 
        /**
-        * @param int $dbBatchSize
+        * @param int $purgeCacheBatchSize
         */
-       public function setDbBatchSize( $dbBatchSize ) {
-               Assert::parameterType( 'integer', $dbBatchSize, 'dbBatchSize' );
+       public function setPurgeCacheBatchSize( $purgeCacheBatchSize ) {
+               $this->purgeCacheBatchSize = $purgeCacheBatchSize;
+       }
 
-               $this->dbBatchSize = $dbBatchSize;
+       /**
+        * @return int
+        */
+       public function getRefreshLinksBatchSize() {
+               return $this->refreshLinksBatchSize;
+       }
+
+       /**
+        * @param int $refreshLinksBatchSize
+        */
+       public function setRefreshLinksBatchSize( $refreshLinksBatchSize ) {
+               $this->refreshLinksBatchSize = $refreshLinksBatchSize;
+       }
+
+       /**
+        * @return int
+        */
+       public function getRecentChangesBatchSize() {
+               return $this->rcBatchSize;
+       }
+
+       /**
+        * @param int $rcBatchSize
+        */
+       public function setRecentChangesBatchSize( $rcBatchSize ) {
+               $this->rcBatchSize = $rcBatchSize;
        }
 
        private function incrementStats( $updateType, $delta ) {
@@ -133,7 +168,7 @@
                }
 
                $jobs = [];
-               $titleBatches = array_chunk( $titles, $this->dbBatchSize );
+               $titleBatches = array_chunk( $titles, 
$this->getPurgeCacheBatchSize() );
 
                /* @var Title[] $batch */
                foreach ( $titleBatches as $batch ) {
@@ -167,7 +202,7 @@
                }
 
                $jobs = [];
-               $titleBatches = array_chunk( $titles, $this->dbBatchSize );
+               $titleBatches = array_chunk( $titles, 
$this->getRefreshLinksBatchSize() );
 
                /* @var Title[] $batch */
                foreach ( $titleBatches as $batch ) {
@@ -220,7 +255,12 @@
                        return;
                }
 
-               $jobSpec = InjectRCRecordsJob::makeJobSpecification( $titles, 
$change, $rootJobParams );
+               $jobSpec = InjectRCRecordsJob::makeJobSpecification(
+                       $titles,
+                       $change,
+                       $rootJobParams,
+                       $this->getRecentChangesBatchSize()
+               );
 
                $this->jobQueueGroup->lazyPush( $jobSpec );
 
diff --git a/client/includes/WikibaseClient.php 
b/client/includes/WikibaseClient.php
index 4b14a38..74e5cb1 100644
--- a/client/includes/WikibaseClient.php
+++ b/client/includes/WikibaseClient.php
@@ -1156,7 +1156,9 @@
                        MediaWikiServices::getInstance()->getStatsdDataFactory()
                );
 
-               $pageUpdater->setDbBatchSize( $this->settings->getSetting( 
'wikiPageUpdaterDbBatchSize' ) );
+               $pageUpdater->setPurgeCacheBatchSize( 
$this->settings->getSetting( 'purgeCacheBatchSize' ) );
+               $pageUpdater->setRefreshLinksBatchSize( 
$this->settings->getSetting( 'refreshLinksBatchSize' ) );
+               $pageUpdater->setRecentChangesBatchSize( 
$this->settings->getSetting( 'recentChangesBatchSize' ) );
 
                $changeListTransformer = new ChangeRunCoalescer(
                        $this->getStore()->getEntityRevisionLookup(),
diff --git a/client/tests/phpunit/includes/Changes/InjectRCRecordsJobTest.php 
b/client/tests/phpunit/includes/Changes/InjectRCRecordsJobTest.php
index 1636500..cd117d8 100644
--- a/client/tests/phpunit/includes/Changes/InjectRCRecordsJobTest.php
+++ b/client/tests/phpunit/includes/Changes/InjectRCRecordsJobTest.php
@@ -493,7 +493,8 @@
                                21 => [ 0, 'Foo' ],
                                22 => [ 0, 'Bar' ],
                                23 => [ 0, 'Cuzz' ],
-                       ]
+                       ],
+                       'batchSize' => 2
                ];
 
                $job = new InjectRCRecordsJob(
@@ -505,7 +506,6 @@
                );
 
                $job->setTitleFactory( $this->getTitleFactoryMock() );
-               $job->setDbBatchSize( 2 );
 
                $job->run();
        }
diff --git a/docs/options.wiki b/docs/options.wiki
index 6cf9eb2..ddc3dec 100644
--- a/docs/options.wiki
+++ b/docs/options.wiki
@@ -110,4 +110,7 @@
 ;sendEchoNotification: If true, allows users on the client wiki to get a 
notification when a page they created is connected to a repo item. This 
requires the Echo extension.
 ;echoIcon: If <code>sendEchoNotification</code> is set to <code>true</code>, 
you can also provide what icon the user will see. The correct syntax is <code>[ 
'url' => '...' ]</code> or <code>[ 'path' => '...' ]</code> where 
<code>path</code> is relative to <code>$wgExtensionAssetsPath</code>. Defaults 
to <code>false</code> which means that there will be the default Echo icon.
 ;disabledUsageAspects: Array of usage aspects that should not be saved in the 
<code>wbc_entity_usage</code> table. This currently only supports aspect codes 
(like "T", "L" or "X"), but not full aspect keys (like "L.de").
-;wikiPageUpdaterDbBatchSize: Batch size in updating page table when applying 
changes in the client, default is 50.
+;wikiPageUpdaterDbBatchSize: DEPRECATED. If set, acts as a default for 
purgeCacheBatchSize, recentChangesBatchSize, and refreshLinksBatchSize.
+;purgeCacheBatchSize: Number of pages to process in each UpdateHtmlCacheJob, 
defaults to 100.
+;refreshLinksBatchSize: Number of pages to process in each RefreshLinksJobs, 
defaults to 5.
+;recentChangesBatchSize: Batch size for database operations in 
InjectRCRecordsJob, defaults to 100.

-- 
To view, visit https://gerrit.wikimedia.org/r/377046
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: Ic095e2eba985ea3a6e51645d0be58589b326f218
Gerrit-PatchSet: 1
Gerrit-Project: mediawiki/extensions/Wikibase
Gerrit-Branch: master
Gerrit-Owner: Daniel Kinzler <daniel.kinz...@wikimedia.de>

_______________________________________________
MediaWiki-commits mailing list
MediaWiki-commits@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to