Hoo man has uploaded a new change for review. https://gerrit.wikimedia.org/r/315102
Change subject: Re-Implement SqlEntityIdPager without wb_entity_per_page ...................................................................... Re-Implement SqlEntityIdPager without wb_entity_per_page Bug: T140888 Change-Id: Ie3efe0c07bee0dc23cd96a15857f864f2f8b3dab --- M repo/includes/Store/Sql/SqlEntityIdPager.php M repo/includes/Store/Sql/SqlEntityIdPagerFactory.php M repo/maintenance/dumpJson.php M repo/maintenance/dumpRdf.php M repo/maintenance/rebuildItemsPerSite.php M repo/tests/phpunit/includes/Store/Sql/SqlEntityIdPagerFactoryTest.php M repo/tests/phpunit/includes/Store/Sql/SqlEntityIdPagerTest.php 7 files changed, 236 insertions(+), 197 deletions(-) git pull ssh://gerrit.wikimedia.org:29418/mediawiki/extensions/Wikibase refs/changes/02/315102/1 diff --git a/repo/includes/Store/Sql/SqlEntityIdPager.php b/repo/includes/Store/Sql/SqlEntityIdPager.php index 5fc88dd..c5c2476 100644 --- a/repo/includes/Store/Sql/SqlEntityIdPager.php +++ b/repo/includes/Store/Sql/SqlEntityIdPager.php @@ -3,10 +3,13 @@ namespace Wikibase\Repo\Store\Sql; use InvalidArgumentException; +use ResultWrapper; use Wikibase\DataModel\Entity\EntityId; -use Wikibase\DataModel\Entity\Int32EntityId; -use Wikibase\Lib\EntityIdComposer; +use Wikibase\DataModel\Entity\EntityIdParser; +use Wikibase\DataModel\Entity\EntityIdParsingException; +use Wikibase\Lib\Store\EntityNamespaceLookup; use Wikibase\Repo\Store\EntityIdPager; +use Wikimedia\Assert\Assert; /** * SqlEntityIdPager is a cursor for iterating over the EntityIds stored in @@ -19,9 +22,14 @@ class SqlEntityIdPager implements EntityIdPager { /** - * @var EntityIdComposer + * @var EntityNamespaceLookup */ - private $entityIdComposer; + private $entityNamespaceLookup; + + /** + * @var EntityIdParser + */ + private $entityIdParser; /** * @var string|null @@ -29,26 +37,33 @@ private $entityType; /** - * @var EntityId|null - */ - private $position = null; - - /** * @var string */ private $redirectMode; /** - * @param EntityIdComposer $entityIdComposer + * Last page_id selected. + * + * @var int + */ + private $position = 0; + + /** + * @param EntityNamespaceLookup $entityNamespaceLookup + * @param EntityIdParser $entityIdParser * @param null|string $entityType The desired entity type, or null for any type. * @param string $redirectMode A EntityIdPager::XXX_REDIRECTS constant (default is NO_REDIRECTS). */ public function __construct( - EntityIdComposer $entityIdComposer, + EntityNamespaceLookup $entityNamespaceLookup, + EntityIdParser $entityIdParser, $entityType = null, $redirectMode = EntityIdPager::NO_REDIRECTS ) { - $this->entityIdComposer = $entityIdComposer; + Assert::parameterType( 'string|null', $entityType, '$entityType' ); + + $this->entityNamespaceLookup = $entityNamespaceLookup; + $this->entityIdParser = $entityIdParser; $this->entityType = $entityType; $this->redirectMode = $redirectMode; } @@ -69,99 +84,64 @@ * be empty if there are no more entities to list from the given offset. */ public function fetchIds( $limit ) { - $ids = $this->listEntities( $this->entityType, $limit, $this->position, $this->redirectMode ); + Assert::parameter( is_int( $limit ) && $limit > 0, '$limit', '$limit must be a positive integer' ); - if ( !empty( $ids ) ) { - $this->position = end( $ids ); - reset( $ids ); - } + $dbr = wfGetDB( DB_REPLICA ); + $rows = $dbr->select( + 'page', + [ 'page_id', 'page_title' ], + $this->getWhere(), + __METHOD__, + [ + 'ORDER BY' => 'page_id ASC', + 'LIMIT' => $limit + ] + ); - return $ids; + return $this->getEntityIdsFromRows( $rows ); } /** - * @param null|string $entityType The entity type to look for. - * @param int $limit The maximum number of IDs to return. - * @param EntityId|null $after Only return entities with IDs greater than this. - * @param string $redirects A XXX_REDIRECTS constant (default is NO_REDIRECTS). - * - * @throws InvalidArgumentException - * @return EntityId[] + * @return array */ - private function listEntities( $entityType, $limit, EntityId $after = null, $redirects = self::NO_REDIRECTS ) { - if ( $entityType === null ) { - $where = array(); - //NOTE: needs to be id/type, not type/id, according to the definition of the relevant - // index in wikibase.sql: wb_entity_per_page (epp_entity_id, epp_entity_type); - $orderBy = array( 'epp_entity_id', 'epp_entity_type' ); - } elseif ( !is_string( $entityType ) ) { - throw new InvalidArgumentException( '$entityType must be a string (or null)' ); + private function getWhere() { + $where = [ 'page_id > ' . (int)$this->position ]; + + if ( $this->entityType === null ) { + $where['page_namespace'] = $this->entityNamespaceLookup->getEntityNamespaces(); } else { - $where = array( 'epp_entity_type' => $entityType ); - // NOTE: If the type is fixed, don't use the type in the order; - // before changing this, check index usage. - $orderBy = array( 'epp_entity_id' ); + $where['page_namespace'] = $this->entityNamespaceLookup->getEntityNamespace( + $this->entityType + ); } - if ( $redirects === self::NO_REDIRECTS ) { - $where[] = 'epp_redirect_target IS NULL'; - } elseif ( $redirects === self::ONLY_REDIRECTS ) { - $where[] = 'epp_redirect_target IS NOT NULL'; + if ( $this->redirectMode === self::NO_REDIRECTS ) { + $where['page_is_redirect'] = 0; + } elseif ( $this->redirectMode === self::ONLY_REDIRECTS ) { + $where['page_is_redirect'] = 1; } - if ( !is_int( $limit ) || $limit < 1 ) { - throw new InvalidArgumentException( '$limit must be a positive integer' ); - } - - $dbr = wfGetDB( DB_REPLICA ); - - if ( $after ) { - if ( !( $after instanceof Int32EntityId ) ) { - throw new InvalidArgumentException( '$after must be an Int32EntityId' ); - } - - $numericId = (int)$after->getNumericId(); - - if ( $entityType === null ) { - // Ugly. About time we switch to qualified, string based IDs! - // NOTE: this must be consistent with the sort order, see above! - $where[] = '( ( epp_entity_type > ' . $dbr->addQuotes( $after->getEntityType() ) - . ' AND epp_entity_id = ' . $numericId . ' )' - . ' OR epp_entity_id > ' . $numericId . ' )'; - } else { - $where[] = 'epp_entity_id > ' . $numericId; - } - } - - $rows = $dbr->select( - 'wb_entity_per_page', - array( 'entity_type' => 'epp_entity_type', 'entity_id' => 'epp_entity_id' ), - $where, - __METHOD__, - array( - 'ORDER BY' => $orderBy, - // MySQL tends to use the epp_redirect_target key which has a very low selectivity - 'USE INDEX' => 'wb_epp_entity', - 'LIMIT' => $limit - ) - ); - - $ids = $this->getEntityIdsFromRows( $rows ); - return $ids; + return $where; } + /** + * @param ResultWrapper $rows + * + * @return EntityId[] + */ private function getEntityIdsFromRows( $rows ) { - $entities = array(); + $entityIds = []; foreach ( $rows as $row ) { try { - $entities[] = $this->entityIdComposer->composeEntityId( $row->entity_type, $row->entity_id ); - } catch ( InvalidArgumentException $ex ) { - wfLogWarning( 'Unsupported entity type "' . $row->entity_type . '"' ); + $this->position = (int)$row->page_id; + $entityIds[] = $this->entityIdParser->parse( $row->page_title ); + } catch ( EntityIdParsingException $ex ) { + wfLogWarning( 'Unexpected entity id serialization "' . $row->page_title . '"' ); } } - return $entities; + return $entityIds; } } diff --git a/repo/includes/Store/Sql/SqlEntityIdPagerFactory.php b/repo/includes/Store/Sql/SqlEntityIdPagerFactory.php index d03efee..8e1bff9 100644 --- a/repo/includes/Store/Sql/SqlEntityIdPagerFactory.php +++ b/repo/includes/Store/Sql/SqlEntityIdPagerFactory.php @@ -2,7 +2,8 @@ namespace Wikibase\Repo\Store\Sql; -use Wikibase\Lib\EntityIdComposer; +use Wikibase\DataModel\Entity\EntityIdParser; +use Wikibase\Lib\Store\EntityNamespaceLookup; use Wikibase\Repo\Store\EntityIdPager; /** @@ -14,15 +15,25 @@ class SqlEntityIdPagerFactory { /** - * @var EntityIdComposer + * @var EntityNamespaceLookup */ - private $entityIdComposer; + private $entityNamespaceLookup; /** - * @param EntityIdComposer $entityIdComposer + * @var EntityIdParser */ - public function __construct( EntityIdComposer $entityIdComposer ) { - $this->entityIdComposer = $entityIdComposer; + private $entityIdParser; + + /** + * @param EntityNamespaceLookup $entityNamespaceLookup + * @param EntityIdParser $entityIdParser + */ + public function __construct( + EntityNamespaceLookup $entityNamespaceLookup, + EntityIdParser $entityIdParser + ) { + $this->entityNamespaceLookup = $entityNamespaceLookup; + $this->entityIdParser = $entityIdParser; } /** @@ -33,7 +44,8 @@ */ public function newSqlEntityIdPager( $entityType = null, $redirectMode = EntityIdPager::NO_REDIRECTS ) { return new SqlEntityIdPager( - $this->entityIdComposer, + $this->entityNamespaceLookup, + $this->entityIdParser, $entityType, $redirectMode ); diff --git a/repo/maintenance/dumpJson.php b/repo/maintenance/dumpJson.php index 304081b..91fe4fe 100644 --- a/repo/maintenance/dumpJson.php +++ b/repo/maintenance/dumpJson.php @@ -79,7 +79,10 @@ public function execute() { if ( !$this->hasHadServicesSet ) { $wikibaseRepo = WikibaseRepo::getDefaultInstance(); - $sqlEntityIdPagerFactory = new SqlEntityIdPagerFactory( $wikibaseRepo->getEntityIdComposer() ); + $sqlEntityIdPagerFactory = new SqlEntityIdPagerFactory( + $wikibaseRepo->getEntityNamespaceLookup(), + $wikibaseRepo->getEntityIdParser() + ); $revisionLookup = $wikibaseRepo->getEntityRevisionLookup( 'uncached' ); $this->setServices( diff --git a/repo/maintenance/dumpRdf.php b/repo/maintenance/dumpRdf.php index f477639..75c197b 100644 --- a/repo/maintenance/dumpRdf.php +++ b/repo/maintenance/dumpRdf.php @@ -105,7 +105,10 @@ public function execute() { if ( !$this->hasHadServicesSet ) { $wikibaseRepo = WikibaseRepo::getDefaultInstance(); - $sqlEntityIdPagerFactory = new SqlEntityIdPagerFactory( $wikibaseRepo->getEntityIdComposer() ); + $sqlEntityIdPagerFactory = new SqlEntityIdPagerFactory( + $wikibaseRepo->getEntityNamespaceLookup(), + $wikibaseRepo->getEntityIdParser() + ); $this->setServices( $sqlEntityIdPagerFactory, diff --git a/repo/maintenance/rebuildItemsPerSite.php b/repo/maintenance/rebuildItemsPerSite.php index 416c1e9..febffb0 100644 --- a/repo/maintenance/rebuildItemsPerSite.php +++ b/repo/maintenance/rebuildItemsPerSite.php @@ -61,8 +61,11 @@ $builder->setReporter( $reporter ); $builder->setBatchSize( $batchSize ); - $entityPerPage = $store->newEntityPerPage(); - $stream = new SqlEntityIdPager( $entityPerPage, 'item' ); + $stream = new SqlEntityIdPager( + $wikibaseRepo->getEntityNamespaceLookup(), + $wikibaseRepo->getEntityIdParser(), + 'item' + ); // Now <s>kill</s> fix the table $builder->rebuild( $stream ); diff --git a/repo/tests/phpunit/includes/Store/Sql/SqlEntityIdPagerFactoryTest.php b/repo/tests/phpunit/includes/Store/Sql/SqlEntityIdPagerFactoryTest.php index 102db16..6dd26e5 100644 --- a/repo/tests/phpunit/includes/Store/Sql/SqlEntityIdPagerFactoryTest.php +++ b/repo/tests/phpunit/includes/Store/Sql/SqlEntityIdPagerFactoryTest.php @@ -2,8 +2,9 @@ namespace Wikibase\Repo\Tests\Store\Sql; -use MediaWikiTestCase; -use Wikibase\Lib\EntityIdComposer; +use PHPUnit_Framework_TestCase; +use Wikibase\DataModel\Entity\EntityIdParser; +use Wikibase\Lib\Store\EntityNamespaceLookup; use Wikibase\Repo\Store\Sql\SqlEntityIdPager; use Wikibase\Repo\Store\Sql\SqlEntityIdPagerFactory; @@ -17,10 +18,13 @@ * @license GPL-2.0+ * @author Marius Hoch */ -class SqlEntityIdPagerFactoryTest extends MediaWikiTestCase { +class SqlEntityIdPagerFactoryTest extends PHPUnit_Framework_TestCase { public function testNewSqlEntityIdPager() { - $factory = new SqlEntityIdPagerFactory( new EntityIdComposer( [] ) ); + $factory = new SqlEntityIdPagerFactory( + new EntityNamespaceLookup( [] ), + $this->getMock( EntityIdParser::class ) + ); $pager = $factory->newSqlEntityIdPager(); $this->assertInstanceOf( SqlEntityIdPager::class, $pager ); diff --git a/repo/tests/phpunit/includes/Store/Sql/SqlEntityIdPagerTest.php b/repo/tests/phpunit/includes/Store/Sql/SqlEntityIdPagerTest.php index 9b86d4b..e3f894c 100644 --- a/repo/tests/phpunit/includes/Store/Sql/SqlEntityIdPagerTest.php +++ b/repo/tests/phpunit/includes/Store/Sql/SqlEntityIdPagerTest.php @@ -3,18 +3,16 @@ namespace Wikibase\Repo\Tests\Store\Sql; use MediaWikiTestCase; -use MediaWiki\MediaWikiServices; -use Wikibase\DataModel\Entity\BasicEntityIdParser; use Wikibase\DataModel\Entity\EntityDocument; +use Wikibase\DataModel\Entity\EntityId; use Wikibase\DataModel\Entity\EntityRedirect; use Wikibase\DataModel\Entity\Item; use Wikibase\DataModel\Entity\ItemId; use Wikibase\DataModel\Entity\Property; use Wikibase\DataModel\Entity\PropertyId; -use Wikibase\Lib\EntityIdComposer; use Wikibase\Repo\Store\EntityIdPager; -use Wikibase\Repo\Store\Sql\EntityPerPageTable; use Wikibase\Repo\Store\Sql\SqlEntityIdPager; +use Wikibase\Repo\WikibaseRepo; /** * @covers Wikibase\Repo\Store\Sql\SqlEntityIdPager @@ -33,26 +31,53 @@ */ class SqlEntityIdPagerTest extends MediaWikiTestCase { + public function setUp() { + parent::setUp(); + + $this->tablesUsed[] = 'page'; + + // We need to initially empty the table + static $setUp = false; + if ( !$setUp ) { + $setUp = true; + wfGetDB( DB_MASTER )->delete( 'page', '*', __METHOD__ ); + } + } + /** * @param EntityDocument[] $entities * @param EntityRedirect[] $redirects */ - private function fillEntityPerPageTable( array $entities = [], array $redirects = [] ) { - $table = new EntityPerPageTable( - MediaWikiServices::getInstance()->getDBLoadBalancer(), - new BasicEntityIdParser() - ); - $table->clear(); - + private function insertEntities( array $entities = [], array $redirects = [] ) { + $pages = []; foreach ( $entities as $entity ) { - $pageId = $entity->getId()->getNumericId(); - $table->addEntityPage( $entity->getId(), $pageId ); + $pages[] = $this->getPageRow( $entity->getId(), false ); } foreach ( $redirects as $redirect ) { - $pageId = $redirect->getEntityId()->getNumericId(); - $table->addRedirectPage( $redirect->getEntityId(), $pageId, $redirect->getTargetId() ); + $pages[] = $this->getPageRow( $redirect->getEntityId(), true ); } + + $dbw = wfGetDB( DB_MASTER ); + $dbw->insert( + 'page', + $pages, + __METHOD__ + ); + } + + private function getPageRow( EntityId $entityId, $isRedirect ) { + $entityNamespaceLookup = WikibaseRepo::getDefaultInstance()->getEntityNamespaceLookup(); + + return [ + 'page_namespace' => $entityNamespaceLookup->getEntityNamespace( $entityId->getEntityType() ), + 'page_title' => $entityId->getSerialization(), + 'page_restrictions' => '', + 'page_random' => 0, + 'page_latest' => 0, + 'page_len' => 1, + 'page_is_redirect' => $isRedirect ? 1 : 0 + ]; } private function getIdStrings( array $entities ) { @@ -73,17 +98,28 @@ $expectedIds = $this->getIdStrings( $expected ); $actualIds = $this->getIdStrings( $actual ); - $this->assertArrayEquals( $expectedIds, $actualIds, $msg ); + $this->assertArrayEquals( $expectedIds, $actualIds, true ); } /** * @dataProvider listEntitiesProvider */ - public function testFetchIds( array $entities, array $redirects, $type, $limit, $calls, $redirectMode, array $expectedChunks ) { - $this->fillEntityPerPageTable( $entities, $redirects ); + public function testFetchIds( + $entityType, + $limit, + $calls, + $redirectMode, + array $expectedChunks, + array $entitiesToInsert = [], + array $redirectsToInsert = [] + ) { + $this->insertEntities( $entitiesToInsert, $redirectsToInsert ); + + $wikibaseRepo = WikibaseRepo::getDefaultInstance(); $pager = new SqlEntityIdPager( - new EntityIdComposer( [] ), - $type, + $wikibaseRepo->getEntityNamespaceLookup(), + $wikibaseRepo->getEntityIdParser(), + $entityType, $redirectMode ); @@ -100,95 +136,93 @@ return [ 'empty' => [ - [], - [], null, 100, 1, EntityIdPager::NO_REDIRECTS, [ [] ] ], - 'some entities' => [ - [ $item, $property ], - [ $redirect ], - null, - 100, - 1, - EntityIdPager::NO_REDIRECTS, - [ [ $property, $item ] ] - ], - 'two chunks' => [ - [ $item, $property ], - [ $redirect ], - null, - 1, - 2, - EntityIdPager::NO_REDIRECTS, - [ [ $property ], [ $item ] ] - ], - 'chunks with limit > 1' => [ - [ $item, $property ], - [ $redirect ], - null, - 2, - 3, - EntityIdPager::INCLUDE_REDIRECTS, - [ [ $property, $item ], [ $redirect ], [] ] - ], - 'four chunks (two empty)' => [ - [ $item, $property ], - [ $redirect ], - null, - 1, - 4, - EntityIdPager::NO_REDIRECTS, - [ [ $property ], [ $item ], [], [] ] - ], - 'include redirects' => [ - [ $item, $property ], - [ $redirect ], - null, - 100, - 1, - EntityIdPager::INCLUDE_REDIRECTS, - [ [ $property, $item, $redirect ] ] - ], - 'only redirects' => [ - [ $item, $property ], - [ $redirect ], - null, - 100, - 1, - EntityIdPager::ONLY_REDIRECTS, - [ [ $redirect ] ] - ], - 'just properties' => [ - [ $item, $property ], - [ $redirect ], - Property::ENTITY_TYPE, - 100, - 1, - EntityIdPager::NO_REDIRECTS, - [ [ $property ] ] - ], - 'limit' => [ - [ $item, $property ], - [ $redirect ], - Property::ENTITY_TYPE, - 1, - 1, - EntityIdPager::NO_REDIRECTS, - [ [ $property ] ] // current sort order is by numeric id, then type. - ], 'no matches' => [ - [ $property ], - [ $redirect ], Item::ENTITY_TYPE, 100, 1, EntityIdPager::NO_REDIRECTS, - [ [] ] + [ [] ], + [ $property ], + [ $redirect ] ], + 'some entities' => [ + null, + 100, + 1, + EntityIdPager::NO_REDIRECTS, + [ [ $property, $item ] ], + [ $property, $item ], + [ $redirect ] + ], + 'two chunks' => [ + null, + 1, + 2, + EntityIdPager::NO_REDIRECTS, + [ [ $property ], [ $item ] ], + [ $property, $item ], + [ $redirect ] + ], + 'chunks with limit > 1' => [ + null, + 2, + 3, + EntityIdPager::INCLUDE_REDIRECTS, + [ [ $property, $item ], [ $redirect ], [] ], + [ $property, $item ], + [ $redirect ] + ], + 'four chunks (two empty)' => [ + null, + 1, + 4, + EntityIdPager::NO_REDIRECTS, + [ [ $property ], [ $item ], [], [] ], + [ $property, $item ], + [ $redirect ] + ], + 'include redirects' => [ + null, + 100, + 1, + EntityIdPager::INCLUDE_REDIRECTS, + [ [ $property, $item, $redirect ] ], + [ $property, $item ], + [ $redirect ] + ], + 'only redirects' => [ + null, + 100, + 1, + EntityIdPager::ONLY_REDIRECTS, + [ [ $redirect ] ], + [ $property, $item ], + [ $redirect ] + ], + 'just properties' => [ + Property::ENTITY_TYPE, + 100, + 1, + EntityIdPager::NO_REDIRECTS, + [ [ $property ] ], + [ $property, $item ], + [ $redirect ] + ], + 'limit' => [ + Property::ENTITY_TYPE, + 1, + 1, + EntityIdPager::NO_REDIRECTS, + [ [ $property ] ], // current sort order is by numeric id, then type. + [ $property, $item ], + [ $redirect ] + ] ]; } -- To view, visit https://gerrit.wikimedia.org/r/315102 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: newchange Gerrit-Change-Id: Ie3efe0c07bee0dc23cd96a15857f864f2f8b3dab Gerrit-PatchSet: 1 Gerrit-Project: mediawiki/extensions/Wikibase Gerrit-Branch: master Gerrit-Owner: Hoo man <h...@online.de> _______________________________________________ MediaWiki-commits mailing list MediaWiki-commits@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits