jenkins-bot has submitted this change and it was merged. Change subject: Use single query for multiple revision lookups by pk ......................................................................
Use single query for multiple revision lookups by pk Change-Id: I7bf18898f8b4f91ca8fa4a85685f023106caddd9 --- M includes/Data/RevisionStorage.php A tests/RevisionStorageTest.php 2 files changed, 163 insertions(+), 20 deletions(-) Approvals: Matthias Mullie: Looks good to me, approved jenkins-bot: Verified diff --git a/includes/Data/RevisionStorage.php b/includes/Data/RevisionStorage.php index 1791b4b..b4db0df 100644 --- a/includes/Data/RevisionStorage.php +++ b/includes/Data/RevisionStorage.php @@ -42,6 +42,11 @@ abstract protected function updateRelated( array $changes, array $old ); abstract protected function removeRelated( array $row ); + /** + * @param DbFactory $dbFactory + * @param array|false List of externel store servers available for insert + * or false to disable. See $wgFlowExternalStore. + */ public function __construct( DbFactory $dbFactory, $externalStore ) { parent::__construct( $dbFactory ); $this->externalStore = $externalStore; @@ -101,29 +106,61 @@ } protected function findMultiInternal( array $queries, array $options = array() ) { + $queriedKeys = array_keys( reset( $queries ) ); // The findMulti doesn't map well to SQL, basically we are asking to answer a bunch // of queries. We can optimize those into a single query in a few select instances: - // Either - // All queries are feature queries for a unique value - // OR - // queries have limit 1 - // queries have no offset - // queries are sorted by the join field(which is time sorted) - // query keys are all in the related table and not the revision table - // + if ( isset( $options['LIMIT'] ) && $options['LIMIT'] == 1 ) { + // Find by primary key + if ( $options == array( 'LIMIT' => 1 ) && + $queriedKeys === array( 'rev_id' ) + ) { + return $this->findRevId( $queries ); + } - $queriedKeys = array_keys( reset( $queries ) ); - if ( $options['LIMIT'] === 1 && - !isset( $options['OFFSET'] ) && - count( $queriedKeys ) === 1 && - in_array( reset( $queriedKeys ), array( 'rev_id', $this->joinField(), $this->relatedPk() ) ) && - isset( $options['ORDER BY'] ) && count( $options['ORDER BY'] ) === 1 && - in_array( reset( $options['ORDER BY'] ), array( 'rev_id DESC', "{$this->joinField()} DESC" ) ) - ) { - return $this->findMostRecent( $queries ); + // Find most recent revision of a number of posts + if ( !isset( $options['OFFSET'] ) && + in_array( $queriedKeys, array( + array( $this->joinField() ), + array( $this->relatedPk() ), + ) ) && + isset( $options['ORDER BY'] ) && + $options['ORDER BY'] === array( 'rev_id DESC' ) + ) { + return $this->findMostRecent( $queries ); + } } - return $this->fallbackFindMulti( $queries, $options ); + // Fetch a list of revisions for each post + // @todo this is slow and inefficient. Mildly better solution would be if + // the index can ask directly for just the list of rev_id instead of whole rows, + // but would still have the need to run a bunch of queries serially. + if ( count( $options ) === 2 && + isset( $options['LIMIT'], $options['ORDER BY'] ) && + $options['ORDER BY'] === array( 'rev_id DESC' ) + ) { + return $this->fallbackFindMulti( $queries, $options ); + // unoptimizable query + } else { + wfDebugLog( __CLASS__, __FUNCTION__ + . ': Unoptimizable query for keys: ' + . implode( ',', array_keys( $queriedKeys ) ) + . ' with options ' + . \FormatJson::encode( $options ) + ); + return $this->fallbackFindMulti( $queries, $options ); + } + } + + protected function findRevId( array $queries ) { + $duplicator = new ResultDuplicator( array( 'rev_id' ), 1 ); + $pks = array(); + foreach ( $queries as $idx => $query ) { + $id = $query['rev_id']; + $duplicator->add( UUID::convertUUIDs( $query ), $idx ); + $pks[$id] = $id; + } + + return $this->findRevIdReal( $duplicator, $pks ); } protected function findMostRecent( array $queries ) { @@ -169,17 +206,21 @@ // Due to the grouping and max, we cant reliably get a full // columns info in the above query, forcing the join below // rather than just querying flow_revision. + return $this->findRevIdReal( $duplicator, $revisionIds ); + } + protected function findRevIdReal( ResultDuplicator $duplicator, array $revisionIds ) { // SELECT * from flow_tree_revision // JOIN flow_revision ON tree_rev_id = rev_id // WHERE tree_rev_id IN (...) + $dbr = $this->dbFactory->getDB( DB_MASTER ); $res = $dbr->select( array( 'flow_revision', 'rev' => $this->joinTable() ), '*', array( 'rev_id' => $revisionIds ), __METHOD__, array(), - array( 'rev' => array( 'JOIN', "rev_id = $joinField" ) ) + array( 'rev' => array( 'JOIN', 'rev_id = ' . $this->joinField() ) ) ); if ( !$res ) { // TODO: dont fail, but dont end up caching bad result either @@ -187,7 +228,7 @@ } foreach ( $res as $row ) { - $row = (array) $row; + $row = (array)$row; $duplicator->merge( $row, array( $row ) ); } diff --git a/tests/RevisionStorageTest.php b/tests/RevisionStorageTest.php new file mode 100644 index 0000000..ca5fbef --- /dev/null +++ b/tests/RevisionStorageTest.php @@ -0,0 +1,102 @@ +<?php + +namespace Flow\Tests; + +use Flow\Data\PostRevisionStorage; +use Flow\Data\ObjectManager; + +class RevisionStorageTest extends \MediaWikiTestCase { + + public static function issuesQueryCountProvider() { + return array( + array( + 'Query by rev_id issues one query', + // db queries issued + 1, + // queries + array( + array( 'rev_id' => 1 ), + array( 'rev_id' => 8 ), + array( 'rev_id' => 3 ), + ), + // query options + array( 'LIMIT' => 1 ) + ), + + array( + 'Query by rev_id issues one query with string limit', + // db queries issued + 1, + // queries + array( + array( 'rev_id' => 1 ), + array( 'rev_id' => 8 ), + array( 'rev_id' => 3 ), + ), + // query options + array( 'LIMIT' => '1' ) + ), + + array( + 'Query for most recent revision issues two queries', + // db queries issued + 2, + // queries + array( + array( 'tree_rev_descendant_id' => 19 ), + array( 'tree_rev_descendant_id' => 22 ), + array( 'tree_rev_descendant_id' => 4 ), + array( 'tree_rev_descendant_id' => 44 ), + ), + // query options + array( 'LIMIT' => 1, 'ORDER BY' => array( 'rev_id DESC' ) ), + ), + + ); + } + + /** + * @dataProvider issuesQueryCountProvider + */ + public function testIssuesQueryCount( $msg, $count, $queries, $options ) { + if ( !isset( $options['LIMIT'] ) || $options['LIMIT'] != 1 ) { + $this->fail( 'Can only generate result set for LIMIT = 1' ); + } + if ( count( $queries ) <= 2 && count( $queries ) != $count ) { + $this->fail( '<= 2 queries always issues the same number of queries' ); + } + + $result = array(); + foreach ( $queries as $query ) { + // this is not in any way a real result, but enough to get through + // the result processing + $result[] = (object)( $query + array( 'rev_id' => 42, 'tree_rev_id' => 42, 'rev_flags' => '' ) ); + } + + $treeRepo = $this->getMockBuilder( 'Flow\Repository\TreeRepository' ) + ->disableOriginalConstructor() + ->getMock(); + $factory = $this->mockDbFactory(); + // this expect is the assertion for the test + $factory->getDB( null )->expects( $this->exactly( $count ) ) + ->method( 'select' ) + ->will( $this->returnValue( $result ) ); + + $storage = new PostRevisionStorage( $factory, false, $treeRepo ); + + $storage->findMulti( $queries, $options ); + } + + protected function mockDbFactory() { + $dbw = $this->getMockBuilder( 'DatabaseMysql' ) + ->disableOriginalConstructor() + ->getMock(); + + $factory = $this->getMock( 'Flow\DbFactory' ); + $factory->expects( $this->any() ) + ->method( 'getDB' ) + ->will( $this->returnValue( $dbw ) ); + + return $factory; + } +} -- To view, visit https://gerrit.wikimedia.org/r/115416 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: merged Gerrit-Change-Id: I7bf18898f8b4f91ca8fa4a85685f023106caddd9 Gerrit-PatchSet: 5 Gerrit-Project: mediawiki/extensions/Flow Gerrit-Branch: master Gerrit-Owner: EBernhardson <ebernhard...@wikimedia.org> Gerrit-Reviewer: Bsitu <bs...@wikimedia.org> Gerrit-Reviewer: EBernhardson <ebernhard...@wikimedia.org> Gerrit-Reviewer: Matthias Mullie <mmul...@wikimedia.org> Gerrit-Reviewer: jenkins-bot <> _______________________________________________ MediaWiki-commits mailing list MediaWiki-commits@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits