Hoo man has uploaded a new change for review.

  https://gerrit.wikimedia.org/r/125725

Change subject: Implement a rebuildItemsPerSite maintenance script
......................................................................

Implement a rebuildItemsPerSite maintenance script

Bug: 59870
Change-Id: Idcf6892873c10768f567e7bb717294d023c192b7
---
A repo/includes/store/sql/ItemsPerSiteBuilder.php
A repo/maintenance/rebuildItemsPerSite.php
A repo/tests/phpunit/includes/store/sql/ItemsPerSiteBuilderTest.php
3 files changed, 398 insertions(+), 0 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/mediawiki/extensions/Wikibase 
refs/changes/25/125725/1

diff --git a/repo/includes/store/sql/ItemsPerSiteBuilder.php 
b/repo/includes/store/sql/ItemsPerSiteBuilder.php
new file mode 100644
index 0000000..6df095f
--- /dev/null
+++ b/repo/includes/store/sql/ItemsPerSiteBuilder.php
@@ -0,0 +1,169 @@
+<?php
+namespace Wikibase;
+
+use MessageReporter;
+use Wikibase\SiteLinkTable;
+use Wikibase\EntityIdPager;
+use Wikibase\EntityLookup;
+
+/**
+ * Utility class for rebuilding the wb_items_per_site table.
+ *
+ * @since 0.5
+ *
+ * @license GNU GPL v2+
+ * @author Marius Hoch < h...@online.de >
+ */
+class ItemsPerSiteBuilder {
+
+       /**
+        * @since 0.5
+        *
+        * @var SiteLinkTable $siteLinkTable
+        */
+       protected $siteLinkTable;
+
+       /**
+        * @since 0.5
+        *
+        * @var EntityLookup $entityLookup
+        */
+       protected $entityLookup;
+
+       /**
+        * @since 0.5
+        *
+        * @var MessageReporter $reporter
+        */
+       protected $reporter;
+
+       /**
+        * The batch size, giving the number of rows to be updated in each 
database transaction.
+        *
+        * @since 0.5
+        *
+        * @var int
+        */
+       protected $batchSize = 100;
+
+       /**
+        * @param SiteLinkTable $siteLinkTable
+        * @param EntityLookup $entityLookup
+        */
+       public function __construct( SiteLinkTable $siteLinkTable, EntityLookup 
$entityLookup ) {
+               $this->siteLinkTable = $siteLinkTable;
+               $this->entityLookup = $entityLookup;
+       }
+
+       /**
+        * @since 0.5
+        *
+        * @param int $batchSize
+        */
+       public function setBatchSize( $batchSize ) {
+               $this->batchSize = $batchSize;
+       }
+
+       /**
+        * Sets the reporter to use for reporting preogress.
+        *
+        * @param \MessageReporter $reporter
+        */
+       public function setReporter( \MessageReporter $reporter ) {
+               $this->reporter = $reporter;
+       }
+
+       /**
+        * @since 0.5
+        *
+        * @param EntityIdPager $entityIdPager
+        */
+       public function rebuild( EntityIdPager $entityIdPager ) {
+               $this->report( 'Start rebuild...' );
+
+               // Iterate over batches of IDs, maintaining the current 
position of the pager in the $position variable.
+               $i = 0;
+               while ( $ids = $entityIdPager->fetchIds( $this->batchSize ) ) {
+                       $i = $i + $this->rebuildSiteLinks( $ids );
+
+                       $this->report( 'Processed ' . $i . ' entities.' );
+               };
+
+               $this->report( 'Rebuild done.' );
+
+               return true;
+       }
+
+       /**
+        * Rebuilds EntityPerPageTable for specified pages
+        *
+        * @since 0.5
+        *
+        * @param EntityId[] $items
+        *
+        * @return int
+        */
+       private function rebuildSiteLinks( array $entityIds ) {
+               $i = 0;
+               foreach ( $entityIds as $entityId ) {
+                       /* @var $entityId EntityId */
+                       if ( !$entityId->getEntityType() === Item::ENTITY_TYPE 
) {
+                               // Just in case someone is using a 
EntityIdPager which doesn't filter non-Items
+                               continue;
+                       }
+                       $item = $this->entityLookup->getEntity( $entityId );
+
+                       if ( !$item ) {
+                               continue;
+                       }
+
+                       $ok = $this->siteLinkTable->saveLinksOfItem( $item );
+                       if ( !$ok ) {
+                               $this->report( 'Savings sitelinks for Item ' . 
$item->getId()->getSerialization() . ' failed' );
+                       }
+
+                       $i++;
+               }
+               // Wait for the slaves (just in case we eg. hit a range of ids 
which need a lot of writes)
+               $this->waitForSlaves();
+
+               return $i;
+       }
+
+       /**
+        * Wait for slaves (quietly)
+        *
+        * @todo: this should be in the Database class.
+        * @todo: thresholds should be configurable
+        *
+        * @author Tim Starling (stolen from recompressTracked.php)
+        */
+       protected function waitForSlaves() {
+               $lb = wfGetLB(); //TODO: allow foreign DB, get from $this->table
+
+               while ( true ) {
+                       list( $host, $maxLag ) = $lb->getMaxLag();
+                       if ( $maxLag < 2 ) {
+                               break;
+                       }
+
+                       $this->report( "Slaves are lagged by $maxLag seconds, 
sleeping..." );
+                       sleep( 5 );
+                       $this->report( "Resuming..." );
+               }
+       }
+
+       /**
+        * reports a message
+        *
+        * @since 0.5
+        *
+        * @param $msg
+        */
+       protected function report( $msg ) {
+               if ( $this->reporter ) {
+                       $this->reporter->reportMessage( $msg );
+               }
+       }
+
+}
diff --git a/repo/maintenance/rebuildItemsPerSite.php 
b/repo/maintenance/rebuildItemsPerSite.php
new file mode 100644
index 0000000..689d812
--- /dev/null
+++ b/repo/maintenance/rebuildItemsPerSite.php
@@ -0,0 +1,90 @@
+<?php
+
+namespace Wikibase;
+
+use Wikibase\Repo\WikibaseRepo;
+use LoggedUpdateMaintenance;
+
+$basePath = getenv( 'MW_INSTALL_PATH' ) !== false ? getenv( 'MW_INSTALL_PATH' 
) : __DIR__ . '/../../../..';
+
+require_once $basePath . '/maintenance/Maintenance.php';
+
+/**
+ * Maintenance script for rebuilding the items_per_site table.
+ *
+ * @since 0.5
+ *
+ * @license GNU GPL v2+
+ * @author Marius Hoch < h...@online.de >
+ */
+class RebuildItemsPerSite extends LoggedUpdateMaintenance {
+
+       public function __construct() {
+               parent::__construct();
+
+               $this->mDescription = 'Rebuild the items_per_site table';
+
+               $this->addOption( 'batch-size', "Number of rows to update per 
batch (100 by default)", false, true );
+       }
+
+       /**
+        * @see LoggedUpdateMaintenance::doDBUpdates
+        *
+        * @return boolean
+        */
+       public function doDBUpdates() {
+               if ( !defined( 'WB_VERSION' ) ) {
+                       $this->output( "You need to have Wikibase enabled in 
order to use this maintenance script!\n\n" );
+                       exit;
+               }
+
+               $batchSize = intval( $this->getOption( 'batch-size', 100 ) );
+
+               $reporter = new \ObservableMessageReporter();
+               $reporter->registerReporterCallback(
+                       array( $this, 'report' )
+               );
+
+               $siteLinkTable = new SiteLinkTable( 'wb_items_per_site', false 
);
+               // Use an uncached EntityLookup here to avoid memory leaks
+               $entityLookup = 
WikibaseRepo::getDefaultInstance()->getEntityLookup( 'uncached' );
+               $builder = new ItemsPerSiteBuilder(
+                       $siteLinkTable,
+                       $entityLookup
+               );
+
+               $builder->setReporter( $reporter );
+
+               $builder->setBatchSize( $batchSize );
+
+               $entityPerPage = new EntityPerPageTable();
+               $stream = new EntityPerPageIdPager( $entityPerPage, 'item' );
+               $builder->rebuild( $stream );
+
+               return true;
+       }
+
+       /**
+        * Outputs a message vis the output() method.
+        *
+        * @since 0.4
+        *
+        * @param $msg
+        */
+       public function report( $msg ) {
+               $this->output( "$msg\n" );
+       }
+
+       /**
+        * @see LoggedUpdateMaintenance::getUpdateKey
+        *
+        * @return string
+        */
+       public function getUpdateKey() {
+               return 'Wikibase\RebuildItemsPerSite';
+       }
+
+}
+
+$maintClass = 'Wikibase\RebuildItemsPerSite';
+require_once( RUN_MAINTENANCE_IF_MAIN );
diff --git a/repo/tests/phpunit/includes/store/sql/ItemsPerSiteBuilderTest.php 
b/repo/tests/phpunit/includes/store/sql/ItemsPerSiteBuilderTest.php
new file mode 100644
index 0000000..4311b5d
--- /dev/null
+++ b/repo/tests/phpunit/includes/store/sql/ItemsPerSiteBuilderTest.php
@@ -0,0 +1,139 @@
+<?php
+
+namespace Wikibase\Test;
+
+use Wikibase\DataModel\Entity\Item;
+use Wikibase\DataModel\Entity\ItemId;
+use Wikibase\EntityIdPager;
+use Wikibase\ItemsPerSiteBuilder;
+use Wikibase\EntityLookup;
+use Wikibase\SiteLinkTable;
+
+/**
+ * @covers Wikibase\ItemsPerSiteBuilder
+ *
+ * @license GPL 2+
+ *
+ * @group Wikibase
+ * @group WikibaseStore
+ * @group WikibaseRepo
+ *
+ * @author Marius Hoch < h...@online.de >
+ */
+class ItemsPerSiteBuilderTest extends \MediaWikiTestCase {
+       /**
+        * @return int
+        */
+       private function getBatchSize() {
+               return 5;
+       }
+
+       /**
+        * @return ItemId
+        */
+       private function getTestItemId() {
+               return new ItemId( 'Q1234' );
+       }
+
+       /**
+        * @return Item
+        */
+       private function getTestItem() {
+               static $item = null;
+
+               if ( !$item  ) {
+                       $item = Item::newEmpty();
+                       $item->setId( $this->getTestItemId() );
+               }
+
+               return $item;
+       }
+
+       /**
+        * @return SiteLinkTable
+        */
+       private function getSiteLinkTableMock() {
+               $siteLinkTableMock = $this->getMockBuilder( 
'\Wikibase\SiteLinkTable' )
+                       ->disableOriginalConstructor()
+                       ->getMock();
+
+               $item = $this->getTestItem();
+               $siteLinkTableMock->expects( $this->exactly( 10 ) )
+                       ->method( 'saveLinksOfItem' )
+                       ->will( $this->returnValue( true ) )
+                       ->with( $this->equalTo( $item ) );
+
+               return $siteLinkTableMock;
+       }
+
+       /**
+        * @return EntityLookup
+        */
+       private function getEntityLookupMock() {
+               $entityLookupMock = $this->getMockBuilder( 
'\Wikibase\EntityLookup' )
+                       ->disableOriginalConstructor()
+                       ->getMock();
+
+               $item = $this->getTestItem();
+               $entityLookupMock->expects( $this->exactly( 10 ) )
+                       ->method( 'getEntity' )
+                       ->will( $this->returnValue( $item ) )
+                       ->with( $this->equalTo( $this->getTestItemId() ) );
+
+               return $entityLookupMock;
+       }
+
+       /**
+        * @return ItemsPerSiteBuilder
+        */
+       private function getItemsPerSiteBuilder() {
+               return new ItemsPerSiteBuilder(
+                       $this->getSiteLinkTableMock(),
+                       $this->getEntityLookupMock()
+               );
+       }
+
+       /**
+        * @return EntityIdPager
+        */
+       private function getEntityIdPager() {
+               $entityIdPager = $this->getMock( 'Wikibase\EntityIdPager' );
+
+               $itemIds = array(
+                       $this->getTestItemId(),
+                       $this->getTestItemId(),
+                       $this->getTestItemId(),
+                       $this->getTestItemId(),
+                       $this->getTestItemId()
+               );
+
+               $entityIdPager->expects( $this->at( 0 ) )
+                       ->method( 'fetchIds' )
+                       ->will( $this->returnValue( $itemIds ) )
+                       ->with( $this->equalTo( $this->getBatchSize() ) );
+
+               $entityIdPager->expects( $this->at( 1 ) )
+                       ->method( 'fetchIds' )
+                       ->will( $this->returnValue( $itemIds ) )
+                       ->with( $this->equalTo( $this->getBatchSize() ) );
+
+               $entityIdPager->expects( $this->at( 2 ) )
+                       ->method( 'fetchIds' )
+                       ->will( $this->returnValue( array() ) )
+                       ->with( $this->equalTo( $this->getBatchSize() ) );
+
+               return $entityIdPager;
+       }
+
+       public function testRebuild() {
+               $itemsPerSiteBuilder = $this->getItemsPerSiteBuilder();
+               $itemsPerSiteBuilder->setBatchSize( $this->getBatchSize() );
+
+               $entityIdPager = $this->getEntityIdPager();
+               $itemsPerSiteBuilder->rebuild( $entityIdPager );
+
+               // The various mocks already verify they get called correctly,
+               // so no need for assertions
+               $this->assertTrue( true );
+       }
+}

-- 
To view, visit https://gerrit.wikimedia.org/r/125725
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: Idcf6892873c10768f567e7bb717294d023c192b7
Gerrit-PatchSet: 1
Gerrit-Project: mediawiki/extensions/Wikibase
Gerrit-Branch: master
Gerrit-Owner: Hoo man <h...@online.de>

_______________________________________________
MediaWiki-commits mailing list
MediaWiki-commits@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to