jenkins-bot has submitted this change and it was merged.

Change subject: Use single query for multiple revision lookups by pk
......................................................................


Use single query for multiple revision lookups by pk

Change-Id: I7bf18898f8b4f91ca8fa4a85685f023106caddd9
---
M includes/Data/RevisionStorage.php
A tests/RevisionStorageTest.php
2 files changed, 163 insertions(+), 20 deletions(-)

Approvals:
  Matthias Mullie: Looks good to me, approved
  jenkins-bot: Verified



diff --git a/includes/Data/RevisionStorage.php 
b/includes/Data/RevisionStorage.php
index 1791b4b..b4db0df 100644
--- a/includes/Data/RevisionStorage.php
+++ b/includes/Data/RevisionStorage.php
@@ -42,6 +42,11 @@
        abstract protected function updateRelated( array $changes, array $old );
        abstract protected function removeRelated( array $row );
 
+       /**
+        * @param DbFactory $dbFactory
+        * @param array|false List of externel store servers available for 
insert
+        *  or false to disable. See $wgFlowExternalStore.
+        */
        public function __construct( DbFactory $dbFactory, $externalStore ) {
                parent::__construct( $dbFactory );
                $this->externalStore = $externalStore;
@@ -101,29 +106,61 @@
        }
 
        protected function findMultiInternal( array $queries, array $options = 
array() ) {
+               $queriedKeys = array_keys( reset( $queries ) );
                // The findMulti doesn't map well to SQL, basically we are 
asking to answer a bunch
                // of queries. We can optimize those into a single query in a 
few select instances:
-               // Either
-               //   All queries are feature queries for a unique value
-               // OR
-               //   queries have limit 1
-               //   queries have no offset
-               //   queries are sorted by the join field(which is time sorted)
-               //   query keys are all in the related table and not the 
revision table
-               //
+               if ( isset( $options['LIMIT'] ) && $options['LIMIT'] == 1 ) {
+                       // Find by primary key
+                       if ( $options == array( 'LIMIT' => 1 ) &&
+                               $queriedKeys === array( 'rev_id' )
+                       ) {
+                               return $this->findRevId( $queries );
+                       }
 
-               $queriedKeys = array_keys( reset( $queries ) );
-               if ( $options['LIMIT'] === 1 &&
-                       !isset( $options['OFFSET'] ) &&
-                       count( $queriedKeys ) === 1 &&
-                       in_array( reset( $queriedKeys ), array( 'rev_id', 
$this->joinField(), $this->relatedPk() ) ) &&
-                       isset( $options['ORDER BY'] ) && count( $options['ORDER 
BY'] ) === 1 &&
-                       in_array( reset( $options['ORDER BY'] ), array( 'rev_id 
DESC', "{$this->joinField()} DESC" ) )
-               ) {
-                       return $this->findMostRecent( $queries );
+                       // Find most recent revision of a number of posts
+                       if ( !isset( $options['OFFSET'] ) &&
+                               in_array( $queriedKeys, array(
+                                       array( $this->joinField() ),
+                                       array( $this->relatedPk() ),
+                               ) ) &&
+                               isset( $options['ORDER BY'] ) &&
+                               $options['ORDER BY'] === array( 'rev_id DESC' )
+                       ) {
+                               return $this->findMostRecent( $queries );
+                       }
                }
 
-               return $this->fallbackFindMulti( $queries, $options );
+               // Fetch a list of revisions for each post
+               // @todo this is slow and inefficient.  Mildly better solution 
would be if
+               // the index can ask directly for just the list of rev_id 
instead of whole rows,
+               // but would still have the need to run a bunch of queries 
serially.
+               if ( count( $options ) === 2 &&
+                       isset( $options['LIMIT'], $options['ORDER BY'] ) &&
+                       $options['ORDER BY'] === array( 'rev_id DESC' )
+               ) {
+                       return $this->fallbackFindMulti( $queries, $options );
+               // unoptimizable query
+               } else {
+                       wfDebugLog( __CLASS__, __FUNCTION__
+                               . ': Unoptimizable query for keys: '
+                               . implode( ',', array_keys( $queriedKeys ) )
+                               . ' with options '
+                               . \FormatJson::encode( $options )
+                       );
+                       return $this->fallbackFindMulti( $queries, $options );
+               }
+       }
+
+       protected function findRevId( array $queries ) {
+               $duplicator = new ResultDuplicator( array( 'rev_id' ), 1 );
+               $pks = array();
+               foreach ( $queries as $idx => $query ) {
+                       $id = $query['rev_id'];
+                       $duplicator->add( UUID::convertUUIDs( $query ), $idx );
+                       $pks[$id] = $id;
+               }
+
+               return $this->findRevIdReal( $duplicator, $pks );
        }
 
        protected function findMostRecent( array $queries ) {
@@ -169,17 +206,21 @@
                // Due to the grouping and max, we cant reliably get a full
                // columns info in the above query, forcing the join below
                // rather than just querying flow_revision.
+               return $this->findRevIdReal( $duplicator, $revisionIds );
+       }
 
+       protected function findRevIdReal( ResultDuplicator $duplicator, array 
$revisionIds ) {
                //  SELECT * from flow_tree_revision
                //        JOIN flow_revision ON tree_rev_id = rev_id
                //   WHERE tree_rev_id IN (...)
+               $dbr = $this->dbFactory->getDB( DB_MASTER );
                $res = $dbr->select(
                        array( 'flow_revision', 'rev' => $this->joinTable() ),
                        '*',
                        array( 'rev_id' => $revisionIds ),
                        __METHOD__,
                        array(),
-                       array( 'rev' => array( 'JOIN', "rev_id = $joinField" ) )
+                       array( 'rev' => array( 'JOIN', 'rev_id = ' . 
$this->joinField() ) )
                );
                if ( !$res ) {
                        // TODO: dont fail, but dont end up caching bad result 
either
@@ -187,7 +228,7 @@
                }
 
                foreach ( $res as $row ) {
-                       $row = (array) $row;
+                       $row = (array)$row;
                        $duplicator->merge( $row, array( $row ) );
                }
 
diff --git a/tests/RevisionStorageTest.php b/tests/RevisionStorageTest.php
new file mode 100644
index 0000000..ca5fbef
--- /dev/null
+++ b/tests/RevisionStorageTest.php
@@ -0,0 +1,102 @@
+<?php
+
+namespace Flow\Tests;
+
+use Flow\Data\PostRevisionStorage;
+use Flow\Data\ObjectManager;
+
+class RevisionStorageTest extends \MediaWikiTestCase {
+
+       public static function issuesQueryCountProvider() {
+               return array(
+                       array(
+                               'Query by rev_id issues one query',
+                               // db queries issued
+                               1,
+                               // queries
+                               array(
+                                       array( 'rev_id' => 1 ),
+                                       array( 'rev_id' => 8 ),
+                                       array( 'rev_id' => 3 ),
+                               ),
+                               // query options
+                               array( 'LIMIT' => 1 )
+                       ),
+
+                       array(
+                               'Query by rev_id issues one query with string 
limit',
+                               // db queries issued
+                               1,
+                               // queries
+                               array(
+                                       array( 'rev_id' => 1 ),
+                                       array( 'rev_id' => 8 ),
+                                       array( 'rev_id' => 3 ),
+                               ),
+                               // query options
+                               array( 'LIMIT' => '1' )
+                       ),
+
+                       array(
+                               'Query for most recent revision issues two 
queries',
+                               // db queries issued
+                               2,
+                               // queries
+                               array(
+                                       array( 'tree_rev_descendant_id' => 19 ),
+                                       array( 'tree_rev_descendant_id' => 22 ),
+                                       array( 'tree_rev_descendant_id' => 4 ),
+                                       array( 'tree_rev_descendant_id' => 44 ),
+                               ),
+                               // query options
+                               array( 'LIMIT' => 1, 'ORDER BY' => array( 
'rev_id DESC' ) ),
+                       ),
+
+               );
+       }
+
+       /**
+        * @dataProvider issuesQueryCountProvider
+        */
+       public function testIssuesQueryCount( $msg, $count, $queries, $options 
) {
+               if ( !isset( $options['LIMIT'] ) || $options['LIMIT'] != 1 ) {
+                       $this->fail( 'Can only generate result set for LIMIT = 
1' );
+               }
+               if ( count( $queries ) <= 2 && count( $queries ) != $count ) {
+                       $this->fail( '<= 2 queries always issues the same 
number of queries' );
+               }
+
+               $result = array();
+               foreach ( $queries as $query ) {
+                       // this is not in any way a real result, but enough to 
get through
+                       // the result processing
+                       $result[] = (object)( $query + array( 'rev_id' => 42, 
'tree_rev_id' => 42, 'rev_flags' => '' ) );
+               }
+
+               $treeRepo = $this->getMockBuilder( 
'Flow\Repository\TreeRepository' )
+                       ->disableOriginalConstructor()
+                       ->getMock();
+               $factory = $this->mockDbFactory();
+               // this expect is the assertion for the test
+               $factory->getDB( null )->expects( $this->exactly( $count ) )
+                       ->method( 'select' )
+                       ->will( $this->returnValue( $result ) );
+
+               $storage = new PostRevisionStorage( $factory, false, $treeRepo 
);
+
+               $storage->findMulti( $queries, $options );
+       }
+
+       protected function mockDbFactory() {
+               $dbw = $this->getMockBuilder( 'DatabaseMysql' )
+                       ->disableOriginalConstructor()
+                       ->getMock();
+
+               $factory = $this->getMock( 'Flow\DbFactory' );
+               $factory->expects( $this->any() )
+                       ->method( 'getDB' )
+                       ->will( $this->returnValue( $dbw ) );
+
+               return $factory;
+       }
+}

-- 
To view, visit https://gerrit.wikimedia.org/r/115416
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: merged
Gerrit-Change-Id: I7bf18898f8b4f91ca8fa4a85685f023106caddd9
Gerrit-PatchSet: 5
Gerrit-Project: mediawiki/extensions/Flow
Gerrit-Branch: master
Gerrit-Owner: EBernhardson <ebernhard...@wikimedia.org>
Gerrit-Reviewer: Bsitu <bs...@wikimedia.org>
Gerrit-Reviewer: EBernhardson <ebernhard...@wikimedia.org>
Gerrit-Reviewer: Matthias Mullie <mmul...@wikimedia.org>
Gerrit-Reviewer: jenkins-bot <>

_______________________________________________
MediaWiki-commits mailing list
MediaWiki-commits@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to