jenkins-bot has submitted this change and it was merged. ( 
https://gerrit.wikimedia.org/r/405739 )

Change subject: Allow continuing Wikibase entity dumps
......................................................................


Allow continuing Wikibase entity dumps

This only works when SqlEntityIdPager is used internally,
but that IMO also is the only place this makes sense.

Bug: T177550
Change-Id: I05856c8022969b427a4b8045b54afb15011ff6be
---
M repo/includes/Dumpers/DumpGenerator.php
M repo/maintenance/dumpEntities.php
A repo/tests/phpunit/data/maintenance/dumpJson-limit2-log.txt
A repo/tests/phpunit/data/maintenance/dumpJson-limit2-out.txt
M repo/tests/phpunit/maintenance/dumpJsonTest.php
5 files changed, 63 insertions(+), 12 deletions(-)

Approvals:
  Ladsgroup: Looks good to me, approved
  jenkins-bot: Verified



diff --git a/repo/includes/Dumpers/DumpGenerator.php 
b/repo/includes/Dumpers/DumpGenerator.php
index 8afc47b..73049ff 100644
--- a/repo/includes/Dumpers/DumpGenerator.php
+++ b/repo/includes/Dumpers/DumpGenerator.php
@@ -13,6 +13,7 @@
 use Wikibase\Lib\Reporting\RethrowingExceptionHandler;
 use Wikibase\Lib\Store\StorageException;
 use Wikibase\DataModel\Services\EntityId\EntityIdPager;
+use Wikibase\Repo\Store\Sql\SqlEntityIdPager;
 
 /**
  * DumpGenerator generates a dump of a given set of entities, excluding
@@ -243,7 +244,14 @@
 
                // Iterate over batches of IDs, maintaining the current 
position of the pager in the $position variable.
                while ( true ) {
-                       $ids = $idPager->fetchIds( $this->batchSize );
+                       if ( $this->limit && ( $dumpCount + $this->batchSize ) 
> $this->limit ) {
+                               // Try not to overrun $limit in order to make 
sure pager's position can be used for continuing.
+                               $limit = $this->limit - $dumpCount;
+                       } else {
+                               $limit = $this->batchSize;
+                       }
+
+                       $ids = $idPager->fetchIds( $limit );
                        if ( !$ids ) {
                                break;
                        }
@@ -253,6 +261,13 @@
                        $this->progressReporter->reportMessage( 'Processed ' . 
$dumpCount . ' entities.' );
 
                        if ( $this->limit && $dumpCount >= $this->limit ) {
+                               $this->progressReporter->reportMessage( 
'Reached entity dump limit of ' . $this->limit . '.' );
+
+                               if ( $idPager instanceof SqlEntityIdPager ) {
+                                       // This message is possibly being 
parsed for continuation purposes, thus avoid changing it.
+                                       $this->progressReporter->reportMessage( 
'Last SqlEntityIdPager position: ' . $idPager->getPosition() .  '.' );
+                               }
+
                                break;
                        }
                }
diff --git a/repo/maintenance/dumpEntities.php 
b/repo/maintenance/dumpEntities.php
index 5f887f6..d8dd804 100644
--- a/repo/maintenance/dumpEntities.php
+++ b/repo/maintenance/dumpEntities.php
@@ -52,6 +52,7 @@
                $this->addOption( 'quiet', "Disable progress reporting", false, 
false );
                $this->addOption( 'limit', "Limit how many entities are 
dumped.", false, true );
                $this->addOption( 'no-cache', "If this is set, don't try to 
read from an EntityRevisionCache.", false, false );
+               $this->addOption( 'continue', 'Continue parameter for 
SqlEntityIdPager. Not compatible with --list-file.', false, true );
        }
 
        public function setDumpEntitiesServices( SqlEntityIdPagerFactory 
$sqlEntityIdPagerFactory ) {
@@ -230,7 +231,14 @@
         * @return SqlEntityIdPager
         */
        private function makeIdQueryStream( $entityType ) {
-               return $this->sqlEntityIdPagerFactory->newSqlEntityIdPager( 
$entityType, $this->getRedirectMode() );
+               $sqlEntityIdPager = 
$this->sqlEntityIdPagerFactory->newSqlEntityIdPager( $entityType, 
$this->getRedirectMode() );
+
+               $continue = $this->getOption( 'continue', null );
+               if ( $continue ) {
+                       $sqlEntityIdPager->setPosition( intval( $continue ) );
+               }
+
+               return $sqlEntityIdPager;
        }
 
        /**
diff --git a/repo/tests/phpunit/data/maintenance/dumpJson-limit2-log.txt 
b/repo/tests/phpunit/data/maintenance/dumpJson-limit2-log.txt
new file mode 100644
index 0000000..d5a768c
--- /dev/null
+++ b/repo/tests/phpunit/data/maintenance/dumpJson-limit2-log.txt
@@ -0,0 +1,3 @@
+Dumping shard 0/1
+Processed 2 entities.
+Reached entity dump limit of 2.
diff --git a/repo/tests/phpunit/data/maintenance/dumpJson-limit2-out.txt 
b/repo/tests/phpunit/data/maintenance/dumpJson-limit2-out.txt
new file mode 100644
index 0000000..5e8a480
--- /dev/null
+++ b/repo/tests/phpunit/data/maintenance/dumpJson-limit2-out.txt
@@ -0,0 +1,4 @@
+[
+{"type":"item","id":"Q1","labels":{},"descriptions":{},"aliases":{},"claims":{},"sitelinks":{}},
+{"type":"property","datatype":"string","id":"P1","labels":{},"descriptions":{},"aliases":{},"claims":{}}
+]
diff --git a/repo/tests/phpunit/maintenance/dumpJsonTest.php 
b/repo/tests/phpunit/maintenance/dumpJsonTest.php
index 7530ed1..a9f55a7 100644
--- a/repo/tests/phpunit/maintenance/dumpJsonTest.php
+++ b/repo/tests/phpunit/maintenance/dumpJsonTest.php
@@ -45,7 +45,7 @@
  */
 class DumpJsonTest extends MediaWikiTestCase {
 
-       public function testScript() {
+       private function getDumpJson() {
                $dumpScript = new DumpJson();
 
                $mockRepo = new MockRepository();
@@ -141,21 +141,42 @@
                        $serializerFactory->newEntitySerializer()
                );
 
+               return $dumpScript;
+       }
+
+       public function dumpParameterProvider() {
+               return [
+                       'dump everything' => [
+                               [],
+                               __DIR__ . 
'/../data/maintenance/dumpJson-log.txt',
+                               __DIR__ . 
'/../data/maintenance/dumpJson-out.txt',
+                       ],
+                       'dump with limit 2' => [
+                               [
+                                       'limit' => 2,
+                               ],
+                               __DIR__ . 
'/../data/maintenance/dumpJson-limit2-log.txt',
+                               __DIR__ . 
'/../data/maintenance/dumpJson-limit2-out.txt',
+                       ]
+               ];
+       }
+
+       /**
+        * @dataProvider dumpParameterProvider
+        */
+       public function testScript( array $opts, $expectedLogFile, 
$expectedOutFile ) {
+               $dumpScript = $this->getDumpJson();
+
                $logFileName = tempnam( sys_get_temp_dir(), 
"Wikibase-DumpJsonTest" );
                $outFileName = tempnam( sys_get_temp_dir(), 
"Wikibase-DumpJsonTest" );
 
-               $dumpScript->loadParamsAndArgs(
-                       null,
-                       [
-                               'log' => $logFileName,
-                               'output' => $outFileName,
-                       ]
-               );
+               $opts = $opts + [ 'log' => $logFileName, 'output' => 
$outFileName ];
+               $dumpScript->loadParamsAndArgs( null, $opts );
 
                $dumpScript->execute();
 
-               $expectedLog = file_get_contents( __DIR__ . 
'/../data/maintenance/dumpJson-log.txt' );
-               $expectedOut = file_get_contents( __DIR__ . 
'/../data/maintenance/dumpJson-out.txt' );
+               $expectedLog = file_get_contents( $expectedLogFile );
+               $expectedOut = file_get_contents( $expectedOutFile );
 
                $this->assertEquals(
                        $this->fixLineEndings( $expectedLog ),

-- 
To view, visit https://gerrit.wikimedia.org/r/405739
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: merged
Gerrit-Change-Id: I05856c8022969b427a4b8045b54afb15011ff6be
Gerrit-PatchSet: 2
Gerrit-Project: mediawiki/extensions/Wikibase
Gerrit-Branch: master
Gerrit-Owner: Hoo man <h...@online.de>
Gerrit-Reviewer: Addshore <addshorew...@gmail.com>
Gerrit-Reviewer: ArielGlenn <ar...@wikimedia.org>
Gerrit-Reviewer: Daniel Kinzler <daniel.kinz...@wikimedia.de>
Gerrit-Reviewer: Hoo man <h...@online.de>
Gerrit-Reviewer: Ladsgroup <ladsgr...@gmail.com>
Gerrit-Reviewer: Lucas Werkmeister (WMDE) <lucas.werkmeis...@wikimedia.de>
Gerrit-Reviewer: jenkins-bot <>

_______________________________________________
MediaWiki-commits mailing list
MediaWiki-commits@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to