Hoo man has uploaded a new change for review. https://gerrit.wikimedia.org/r/125725
Change subject: Implement a rebuildItemsPerSite maintenance script ...................................................................... Implement a rebuildItemsPerSite maintenance script Bug: 59870 Change-Id: Idcf6892873c10768f567e7bb717294d023c192b7 --- A repo/includes/store/sql/ItemsPerSiteBuilder.php A repo/maintenance/rebuildItemsPerSite.php A repo/tests/phpunit/includes/store/sql/ItemsPerSiteBuilderTest.php 3 files changed, 398 insertions(+), 0 deletions(-) git pull ssh://gerrit.wikimedia.org:29418/mediawiki/extensions/Wikibase refs/changes/25/125725/1 diff --git a/repo/includes/store/sql/ItemsPerSiteBuilder.php b/repo/includes/store/sql/ItemsPerSiteBuilder.php new file mode 100644 index 0000000..6df095f --- /dev/null +++ b/repo/includes/store/sql/ItemsPerSiteBuilder.php @@ -0,0 +1,169 @@ +<?php +namespace Wikibase; + +use MessageReporter; +use Wikibase\SiteLinkTable; +use Wikibase\EntityIdPager; +use Wikibase\EntityLookup; + +/** + * Utility class for rebuilding the wb_items_per_site table. + * + * @since 0.5 + * + * @license GNU GPL v2+ + * @author Marius Hoch < h...@online.de > + */ +class ItemsPerSiteBuilder { + + /** + * @since 0.5 + * + * @var SiteLinkTable $siteLinkTable + */ + protected $siteLinkTable; + + /** + * @since 0.5 + * + * @var EntityLookup $entityLookup + */ + protected $entityLookup; + + /** + * @since 0.5 + * + * @var MessageReporter $reporter + */ + protected $reporter; + + /** + * The batch size, giving the number of rows to be updated in each database transaction. + * + * @since 0.5 + * + * @var int + */ + protected $batchSize = 100; + + /** + * @param SiteLinkTable $siteLinkTable + * @param EntityLookup $entityLookup + */ + public function __construct( SiteLinkTable $siteLinkTable, EntityLookup $entityLookup ) { + $this->siteLinkTable = $siteLinkTable; + $this->entityLookup = $entityLookup; + } + + /** + * @since 0.5 + * + * @param int $batchSize + */ + public function setBatchSize( $batchSize ) { + $this->batchSize = $batchSize; + } + + /** + * Sets the reporter to use for reporting preogress. + * + * @param \MessageReporter $reporter + */ + public function setReporter( \MessageReporter $reporter ) { + $this->reporter = $reporter; + } + + /** + * @since 0.5 + * + * @param EntityIdPager $entityIdPager + */ + public function rebuild( EntityIdPager $entityIdPager ) { + $this->report( 'Start rebuild...' ); + + // Iterate over batches of IDs, maintaining the current position of the pager in the $position variable. + $i = 0; + while ( $ids = $entityIdPager->fetchIds( $this->batchSize ) ) { + $i = $i + $this->rebuildSiteLinks( $ids ); + + $this->report( 'Processed ' . $i . ' entities.' ); + }; + + $this->report( 'Rebuild done.' ); + + return true; + } + + /** + * Rebuilds EntityPerPageTable for specified pages + * + * @since 0.5 + * + * @param EntityId[] $items + * + * @return int + */ + private function rebuildSiteLinks( array $entityIds ) { + $i = 0; + foreach ( $entityIds as $entityId ) { + /* @var $entityId EntityId */ + if ( !$entityId->getEntityType() === Item::ENTITY_TYPE ) { + // Just in case someone is using a EntityIdPager which doesn't filter non-Items + continue; + } + $item = $this->entityLookup->getEntity( $entityId ); + + if ( !$item ) { + continue; + } + + $ok = $this->siteLinkTable->saveLinksOfItem( $item ); + if ( !$ok ) { + $this->report( 'Savings sitelinks for Item ' . $item->getId()->getSerialization() . ' failed' ); + } + + $i++; + } + // Wait for the slaves (just in case we eg. hit a range of ids which need a lot of writes) + $this->waitForSlaves(); + + return $i; + } + + /** + * Wait for slaves (quietly) + * + * @todo: this should be in the Database class. + * @todo: thresholds should be configurable + * + * @author Tim Starling (stolen from recompressTracked.php) + */ + protected function waitForSlaves() { + $lb = wfGetLB(); //TODO: allow foreign DB, get from $this->table + + while ( true ) { + list( $host, $maxLag ) = $lb->getMaxLag(); + if ( $maxLag < 2 ) { + break; + } + + $this->report( "Slaves are lagged by $maxLag seconds, sleeping..." ); + sleep( 5 ); + $this->report( "Resuming..." ); + } + } + + /** + * reports a message + * + * @since 0.5 + * + * @param $msg + */ + protected function report( $msg ) { + if ( $this->reporter ) { + $this->reporter->reportMessage( $msg ); + } + } + +} diff --git a/repo/maintenance/rebuildItemsPerSite.php b/repo/maintenance/rebuildItemsPerSite.php new file mode 100644 index 0000000..689d812 --- /dev/null +++ b/repo/maintenance/rebuildItemsPerSite.php @@ -0,0 +1,90 @@ +<?php + +namespace Wikibase; + +use Wikibase\Repo\WikibaseRepo; +use LoggedUpdateMaintenance; + +$basePath = getenv( 'MW_INSTALL_PATH' ) !== false ? getenv( 'MW_INSTALL_PATH' ) : __DIR__ . '/../../../..'; + +require_once $basePath . '/maintenance/Maintenance.php'; + +/** + * Maintenance script for rebuilding the items_per_site table. + * + * @since 0.5 + * + * @license GNU GPL v2+ + * @author Marius Hoch < h...@online.de > + */ +class RebuildItemsPerSite extends LoggedUpdateMaintenance { + + public function __construct() { + parent::__construct(); + + $this->mDescription = 'Rebuild the items_per_site table'; + + $this->addOption( 'batch-size', "Number of rows to update per batch (100 by default)", false, true ); + } + + /** + * @see LoggedUpdateMaintenance::doDBUpdates + * + * @return boolean + */ + public function doDBUpdates() { + if ( !defined( 'WB_VERSION' ) ) { + $this->output( "You need to have Wikibase enabled in order to use this maintenance script!\n\n" ); + exit; + } + + $batchSize = intval( $this->getOption( 'batch-size', 100 ) ); + + $reporter = new \ObservableMessageReporter(); + $reporter->registerReporterCallback( + array( $this, 'report' ) + ); + + $siteLinkTable = new SiteLinkTable( 'wb_items_per_site', false ); + // Use an uncached EntityLookup here to avoid memory leaks + $entityLookup = WikibaseRepo::getDefaultInstance()->getEntityLookup( 'uncached' ); + $builder = new ItemsPerSiteBuilder( + $siteLinkTable, + $entityLookup + ); + + $builder->setReporter( $reporter ); + + $builder->setBatchSize( $batchSize ); + + $entityPerPage = new EntityPerPageTable(); + $stream = new EntityPerPageIdPager( $entityPerPage, 'item' ); + $builder->rebuild( $stream ); + + return true; + } + + /** + * Outputs a message vis the output() method. + * + * @since 0.4 + * + * @param $msg + */ + public function report( $msg ) { + $this->output( "$msg\n" ); + } + + /** + * @see LoggedUpdateMaintenance::getUpdateKey + * + * @return string + */ + public function getUpdateKey() { + return 'Wikibase\RebuildItemsPerSite'; + } + +} + +$maintClass = 'Wikibase\RebuildItemsPerSite'; +require_once( RUN_MAINTENANCE_IF_MAIN ); diff --git a/repo/tests/phpunit/includes/store/sql/ItemsPerSiteBuilderTest.php b/repo/tests/phpunit/includes/store/sql/ItemsPerSiteBuilderTest.php new file mode 100644 index 0000000..4311b5d --- /dev/null +++ b/repo/tests/phpunit/includes/store/sql/ItemsPerSiteBuilderTest.php @@ -0,0 +1,139 @@ +<?php + +namespace Wikibase\Test; + +use Wikibase\DataModel\Entity\Item; +use Wikibase\DataModel\Entity\ItemId; +use Wikibase\EntityIdPager; +use Wikibase\ItemsPerSiteBuilder; +use Wikibase\EntityLookup; +use Wikibase\SiteLinkTable; + +/** + * @covers Wikibase\ItemsPerSiteBuilder + * + * @license GPL 2+ + * + * @group Wikibase + * @group WikibaseStore + * @group WikibaseRepo + * + * @author Marius Hoch < h...@online.de > + */ +class ItemsPerSiteBuilderTest extends \MediaWikiTestCase { + /** + * @return int + */ + private function getBatchSize() { + return 5; + } + + /** + * @return ItemId + */ + private function getTestItemId() { + return new ItemId( 'Q1234' ); + } + + /** + * @return Item + */ + private function getTestItem() { + static $item = null; + + if ( !$item ) { + $item = Item::newEmpty(); + $item->setId( $this->getTestItemId() ); + } + + return $item; + } + + /** + * @return SiteLinkTable + */ + private function getSiteLinkTableMock() { + $siteLinkTableMock = $this->getMockBuilder( '\Wikibase\SiteLinkTable' ) + ->disableOriginalConstructor() + ->getMock(); + + $item = $this->getTestItem(); + $siteLinkTableMock->expects( $this->exactly( 10 ) ) + ->method( 'saveLinksOfItem' ) + ->will( $this->returnValue( true ) ) + ->with( $this->equalTo( $item ) ); + + return $siteLinkTableMock; + } + + /** + * @return EntityLookup + */ + private function getEntityLookupMock() { + $entityLookupMock = $this->getMockBuilder( '\Wikibase\EntityLookup' ) + ->disableOriginalConstructor() + ->getMock(); + + $item = $this->getTestItem(); + $entityLookupMock->expects( $this->exactly( 10 ) ) + ->method( 'getEntity' ) + ->will( $this->returnValue( $item ) ) + ->with( $this->equalTo( $this->getTestItemId() ) ); + + return $entityLookupMock; + } + + /** + * @return ItemsPerSiteBuilder + */ + private function getItemsPerSiteBuilder() { + return new ItemsPerSiteBuilder( + $this->getSiteLinkTableMock(), + $this->getEntityLookupMock() + ); + } + + /** + * @return EntityIdPager + */ + private function getEntityIdPager() { + $entityIdPager = $this->getMock( 'Wikibase\EntityIdPager' ); + + $itemIds = array( + $this->getTestItemId(), + $this->getTestItemId(), + $this->getTestItemId(), + $this->getTestItemId(), + $this->getTestItemId() + ); + + $entityIdPager->expects( $this->at( 0 ) ) + ->method( 'fetchIds' ) + ->will( $this->returnValue( $itemIds ) ) + ->with( $this->equalTo( $this->getBatchSize() ) ); + + $entityIdPager->expects( $this->at( 1 ) ) + ->method( 'fetchIds' ) + ->will( $this->returnValue( $itemIds ) ) + ->with( $this->equalTo( $this->getBatchSize() ) ); + + $entityIdPager->expects( $this->at( 2 ) ) + ->method( 'fetchIds' ) + ->will( $this->returnValue( array() ) ) + ->with( $this->equalTo( $this->getBatchSize() ) ); + + return $entityIdPager; + } + + public function testRebuild() { + $itemsPerSiteBuilder = $this->getItemsPerSiteBuilder(); + $itemsPerSiteBuilder->setBatchSize( $this->getBatchSize() ); + + $entityIdPager = $this->getEntityIdPager(); + $itemsPerSiteBuilder->rebuild( $entityIdPager ); + + // The various mocks already verify they get called correctly, + // so no need for assertions + $this->assertTrue( true ); + } +} -- To view, visit https://gerrit.wikimedia.org/r/125725 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: newchange Gerrit-Change-Id: Idcf6892873c10768f567e7bb717294d023c192b7 Gerrit-PatchSet: 1 Gerrit-Project: mediawiki/extensions/Wikibase Gerrit-Branch: master Gerrit-Owner: Hoo man <h...@online.de> _______________________________________________ MediaWiki-commits mailing list MediaWiki-commits@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits