jenkins-bot has submitted this change and it was merged. ( https://gerrit.wikimedia.org/r/405739 )
Change subject: Allow continuing Wikibase entity dumps ...................................................................... Allow continuing Wikibase entity dumps This only works when SqlEntityIdPager is used internally, but that IMO also is the only place this makes sense. Bug: T177550 Change-Id: I05856c8022969b427a4b8045b54afb15011ff6be --- M repo/includes/Dumpers/DumpGenerator.php M repo/maintenance/dumpEntities.php A repo/tests/phpunit/data/maintenance/dumpJson-limit2-log.txt A repo/tests/phpunit/data/maintenance/dumpJson-limit2-out.txt M repo/tests/phpunit/maintenance/dumpJsonTest.php 5 files changed, 63 insertions(+), 12 deletions(-) Approvals: Ladsgroup: Looks good to me, approved jenkins-bot: Verified diff --git a/repo/includes/Dumpers/DumpGenerator.php b/repo/includes/Dumpers/DumpGenerator.php index 8afc47b..73049ff 100644 --- a/repo/includes/Dumpers/DumpGenerator.php +++ b/repo/includes/Dumpers/DumpGenerator.php @@ -13,6 +13,7 @@ use Wikibase\Lib\Reporting\RethrowingExceptionHandler; use Wikibase\Lib\Store\StorageException; use Wikibase\DataModel\Services\EntityId\EntityIdPager; +use Wikibase\Repo\Store\Sql\SqlEntityIdPager; /** * DumpGenerator generates a dump of a given set of entities, excluding @@ -243,7 +244,14 @@ // Iterate over batches of IDs, maintaining the current position of the pager in the $position variable. while ( true ) { - $ids = $idPager->fetchIds( $this->batchSize ); + if ( $this->limit && ( $dumpCount + $this->batchSize ) > $this->limit ) { + // Try not to overrun $limit in order to make sure pager's position can be used for continuing. + $limit = $this->limit - $dumpCount; + } else { + $limit = $this->batchSize; + } + + $ids = $idPager->fetchIds( $limit ); if ( !$ids ) { break; } @@ -253,6 +261,13 @@ $this->progressReporter->reportMessage( 'Processed ' . $dumpCount . ' entities.' ); if ( $this->limit && $dumpCount >= $this->limit ) { + $this->progressReporter->reportMessage( 'Reached entity dump limit of ' . $this->limit . '.' ); + + if ( $idPager instanceof SqlEntityIdPager ) { + // This message is possibly being parsed for continuation purposes, thus avoid changing it. + $this->progressReporter->reportMessage( 'Last SqlEntityIdPager position: ' . $idPager->getPosition() . '.' ); + } + break; } } diff --git a/repo/maintenance/dumpEntities.php b/repo/maintenance/dumpEntities.php index 5f887f6..d8dd804 100644 --- a/repo/maintenance/dumpEntities.php +++ b/repo/maintenance/dumpEntities.php @@ -52,6 +52,7 @@ $this->addOption( 'quiet', "Disable progress reporting", false, false ); $this->addOption( 'limit', "Limit how many entities are dumped.", false, true ); $this->addOption( 'no-cache', "If this is set, don't try to read from an EntityRevisionCache.", false, false ); + $this->addOption( 'continue', 'Continue parameter for SqlEntityIdPager. Not compatible with --list-file.', false, true ); } public function setDumpEntitiesServices( SqlEntityIdPagerFactory $sqlEntityIdPagerFactory ) { @@ -230,7 +231,14 @@ * @return SqlEntityIdPager */ private function makeIdQueryStream( $entityType ) { - return $this->sqlEntityIdPagerFactory->newSqlEntityIdPager( $entityType, $this->getRedirectMode() ); + $sqlEntityIdPager = $this->sqlEntityIdPagerFactory->newSqlEntityIdPager( $entityType, $this->getRedirectMode() ); + + $continue = $this->getOption( 'continue', null ); + if ( $continue ) { + $sqlEntityIdPager->setPosition( intval( $continue ) ); + } + + return $sqlEntityIdPager; } /** diff --git a/repo/tests/phpunit/data/maintenance/dumpJson-limit2-log.txt b/repo/tests/phpunit/data/maintenance/dumpJson-limit2-log.txt new file mode 100644 index 0000000..d5a768c --- /dev/null +++ b/repo/tests/phpunit/data/maintenance/dumpJson-limit2-log.txt @@ -0,0 +1,3 @@ +Dumping shard 0/1 +Processed 2 entities. +Reached entity dump limit of 2. diff --git a/repo/tests/phpunit/data/maintenance/dumpJson-limit2-out.txt b/repo/tests/phpunit/data/maintenance/dumpJson-limit2-out.txt new file mode 100644 index 0000000..5e8a480 --- /dev/null +++ b/repo/tests/phpunit/data/maintenance/dumpJson-limit2-out.txt @@ -0,0 +1,4 @@ +[ +{"type":"item","id":"Q1","labels":{},"descriptions":{},"aliases":{},"claims":{},"sitelinks":{}}, +{"type":"property","datatype":"string","id":"P1","labels":{},"descriptions":{},"aliases":{},"claims":{}} +] diff --git a/repo/tests/phpunit/maintenance/dumpJsonTest.php b/repo/tests/phpunit/maintenance/dumpJsonTest.php index 7530ed1..a9f55a7 100644 --- a/repo/tests/phpunit/maintenance/dumpJsonTest.php +++ b/repo/tests/phpunit/maintenance/dumpJsonTest.php @@ -45,7 +45,7 @@ */ class DumpJsonTest extends MediaWikiTestCase { - public function testScript() { + private function getDumpJson() { $dumpScript = new DumpJson(); $mockRepo = new MockRepository(); @@ -141,21 +141,42 @@ $serializerFactory->newEntitySerializer() ); + return $dumpScript; + } + + public function dumpParameterProvider() { + return [ + 'dump everything' => [ + [], + __DIR__ . '/../data/maintenance/dumpJson-log.txt', + __DIR__ . '/../data/maintenance/dumpJson-out.txt', + ], + 'dump with limit 2' => [ + [ + 'limit' => 2, + ], + __DIR__ . '/../data/maintenance/dumpJson-limit2-log.txt', + __DIR__ . '/../data/maintenance/dumpJson-limit2-out.txt', + ] + ]; + } + + /** + * @dataProvider dumpParameterProvider + */ + public function testScript( array $opts, $expectedLogFile, $expectedOutFile ) { + $dumpScript = $this->getDumpJson(); + $logFileName = tempnam( sys_get_temp_dir(), "Wikibase-DumpJsonTest" ); $outFileName = tempnam( sys_get_temp_dir(), "Wikibase-DumpJsonTest" ); - $dumpScript->loadParamsAndArgs( - null, - [ - 'log' => $logFileName, - 'output' => $outFileName, - ] - ); + $opts = $opts + [ 'log' => $logFileName, 'output' => $outFileName ]; + $dumpScript->loadParamsAndArgs( null, $opts ); $dumpScript->execute(); - $expectedLog = file_get_contents( __DIR__ . '/../data/maintenance/dumpJson-log.txt' ); - $expectedOut = file_get_contents( __DIR__ . '/../data/maintenance/dumpJson-out.txt' ); + $expectedLog = file_get_contents( $expectedLogFile ); + $expectedOut = file_get_contents( $expectedOutFile ); $this->assertEquals( $this->fixLineEndings( $expectedLog ), -- To view, visit https://gerrit.wikimedia.org/r/405739 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: merged Gerrit-Change-Id: I05856c8022969b427a4b8045b54afb15011ff6be Gerrit-PatchSet: 2 Gerrit-Project: mediawiki/extensions/Wikibase Gerrit-Branch: master Gerrit-Owner: Hoo man <h...@online.de> Gerrit-Reviewer: Addshore <addshorew...@gmail.com> Gerrit-Reviewer: ArielGlenn <ar...@wikimedia.org> Gerrit-Reviewer: Daniel Kinzler <daniel.kinz...@wikimedia.de> Gerrit-Reviewer: Hoo man <h...@online.de> Gerrit-Reviewer: Ladsgroup <ladsgr...@gmail.com> Gerrit-Reviewer: Lucas Werkmeister (WMDE) <lucas.werkmeis...@wikimedia.de> Gerrit-Reviewer: jenkins-bot <> _______________________________________________ MediaWiki-commits mailing list MediaWiki-commits@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits