jenkins-bot has submitted this change and it was merged.

Change subject: API: Improve queries for prop=revisions in enum mode
......................................................................


API: Improve queries for prop=revisions in enum mode

This reworks the queries to better use the indexes available, and at the
same time sorts results by rev_timestamp like they always should have
been rather than rev_id. See T88084 for details.

This also takes the opportunity to replace !is_null with !== null, since
it was annoying me while writing this.

Bug: T88084
Bug: T91883
Change-Id: Ie175c6014e75848e9dda6b413175c8575d1ef6af
---
M RELEASE-NOTES-1.26
M includes/api/ApiQueryRevisions.php
2 files changed, 70 insertions(+), 43 deletions(-)

Approvals:
  Aaron Schulz: Looks good to me, approved
  jenkins-bot: Verified



diff --git a/RELEASE-NOTES-1.26 b/RELEASE-NOTES-1.26
index 9426635..57166ef 100644
--- a/RELEASE-NOTES-1.26
+++ b/RELEASE-NOTES-1.26
@@ -23,6 +23,8 @@
   tag is meant to be hidden from user interfaces.
 * action=import no longer allows both the namespace= and rootpage= parameters
   to be set. If they are both set, the value of rootpage= will be ignored.
+* prop=revision output in enum mode is now sorted by timestamp rather than
+  revision ID. This usually won't make any difference.
 
 === Action API internal changes in 1.26 ===
 
diff --git a/includes/api/ApiQueryRevisions.php 
b/includes/api/ApiQueryRevisions.php
index 552ca3b..1a65fe3 100644
--- a/includes/api/ApiQueryRevisions.php
+++ b/includes/api/ApiQueryRevisions.php
@@ -91,10 +91,10 @@
                // Enum mode can only be used when exactly one page is provided.
                // Enumerating revisions on multiple pages make it extremely
                // difficult to manage continuations and require additional SQL 
indexes
-               $enumRevMode = ( !is_null( $params['user'] ) || !is_null( 
$params['excludeuser'] ) ||
-                       !is_null( $params['limit'] ) || !is_null( 
$params['startid'] ) ||
-                       !is_null( $params['endid'] ) || $params['dir'] === 
'newer' ||
-                       !is_null( $params['start'] ) || !is_null( 
$params['end'] ) );
+               $enumRevMode = ( $params['user'] !== null || 
$params['excludeuser'] !== null ||
+                       $params['limit'] !== null || $params['startid'] !== 
null ||
+                       $params['endid'] !== null || $params['dir'] === 'newer' 
||
+                       $params['start'] !== null || $params['end'] !== null );
 
                $pageSet = $this->getPageSet();
                $pageCount = $pageSet->getGoodTitleCount();
@@ -149,7 +149,7 @@
                        }
                } else {
                        $this->limit = $this->getParameter( 'limit' ) ?: 10;
-                       $this->addFields( array( 'rev_id', 'rev_page' ) );
+                       $this->addFields( array( 'rev_id', 'rev_timestamp', 
'rev_page' ) );
                }
 
                if ( $this->fld_tags ) {
@@ -160,7 +160,7 @@
                        $this->addFields( 'ts_tags' );
                }
 
-               if ( !is_null( $params['tag'] ) ) {
+               if ( $params['tag'] !== null ) {
                        $this->addTables( 'change_tag' );
                        $this->addJoinConds(
                                array( 'change_tag' => array( 'INNER JOIN', 
array( 'rev_id=ct_rev_id' ) ) )
@@ -196,58 +196,77 @@
                }
 
                if ( $enumRevMode ) {
+                       // Indexes targeted:
+                       //  page_timestamp if we don't have rvuser
+                       //  page_user_timestamp if we have a logged-in rvuser
+                       //  page_timestamp or usertext_timestamp if we have an 
IP rvuser
+
                        // This is mostly to prevent parameter errors (and 
optimize SQL?)
-                       if ( !is_null( $params['startid'] ) && !is_null( 
$params['start'] ) ) {
+                       if ( $params['startid'] !== null && $params['start'] 
!== null ) {
                                $this->dieUsage( 'start and startid cannot be 
used together', 'badparams' );
                        }
 
-                       if ( !is_null( $params['endid'] ) && !is_null( 
$params['end'] ) ) {
+                       if ( $params['endid'] !== null && $params['end'] !== 
null ) {
                                $this->dieUsage( 'end and endid cannot be used 
together', 'badparams' );
                        }
 
-                       if ( !is_null( $params['user'] ) && !is_null( 
$params['excludeuser'] ) ) {
+                       if ( $params['user'] !== null && $params['excludeuser'] 
!== null ) {
                                $this->dieUsage( 'user and excludeuser cannot 
be used together', 'badparams' );
                        }
 
-                       // Continuing effectively uses startid. But we can't 
use rvstartid
-                       // directly, because there is no way to tell the client 
to ''not''
-                       // send rvstart if it sent it in the original query. So 
instead we
-                       // send the continuation startid as rvcontinue, and 
ignore both
-                       // rvstart and rvstartid when that is supplied.
-                       if ( !is_null( $params['continue'] ) ) {
-                               $params['startid'] = $params['continue'];
-                               $params['start'] = null;
+                       if ( $params['continue'] !== null ) {
+                               $cont = explode( '|', $params['continue'] );
+                               $this->dieContinueUsageIf( count( $cont ) != 2 
);
+                               $op = ( $params['dir'] === 'newer' ? '>' : '<' 
);
+                               $continueTimestamp = $db->addQuotes( 
$db->timestamp( $cont[0] ) );
+                               $continueId = (int)$cont[1];
+                               $this->dieContinueUsageIf( $continueId != 
$cont[1] );
+                               $this->addWhere( "rev_timestamp $op 
$continueTimestamp OR " .
+                                       "(rev_timestamp = $continueTimestamp 
AND " .
+                                       "rev_id $op= $continueId)"
+                               );
                        }
 
-                       // This code makes an assumption that sorting by rev_id 
and rev_timestamp produces
-                       // the same result. This way users may request 
revisions starting at a given time,
-                       // but to page through results use the rev_id returned 
after each page.
-                       // Switching to rev_id removes the potential problem of 
having more than
-                       // one row with the same timestamp for the same page.
-                       // The order needs to be the same as start parameter to 
avoid SQL filesort.
-                       if ( is_null( $params['startid'] ) && is_null( 
$params['endid'] ) ) {
-                               $this->addTimestampWhereRange( 'rev_timestamp', 
$params['dir'],
-                                       $params['start'], $params['end'] );
-                       } else {
-                               $this->addWhereRange( 'rev_id', $params['dir'],
-                                       $params['startid'], $params['endid'] );
-                               // One of start and end can be set
-                               // If neither is set, this does nothing
-                               $this->addTimestampWhereRange( 'rev_timestamp', 
$params['dir'],
-                                       $params['start'], $params['end'], false 
);
+                       // Query optimization: since we're targeting ranges of
+                       // rev_timestamp,rev_id, if we're given an id then 
extract the
+                       // corresponding timestamp from the DB.
+                       // Note we don't use Revision::getTimestampFromId() 
since we don't
+                       // have a Title to pass it and there's not any real 
need to create one.
+                       if ( $params['startid'] !== null ) {
+                               $params['start'] = $db->selectField( 
'revision', 'rev_timestamp',
+                                       array( 'rev_id' => $params['startid'] 
), __METHOD__ );
                        }
+                       if ( $params['endid'] !== null ) {
+                               $params['end'] = $db->selectField( 'revision', 
'rev_timestamp',
+                                       array( 'rev_id' => $params['endid'] ), 
__METHOD__ );
+                       }
+
+                       $this->addTimestampWhereRange( 'rev_timestamp', 
$params['dir'],
+                               $params['start'], $params['end'] );
+                       $this->addWhereRange( 'rev_id', $params['dir'],
+                               $params['startid'], $params['endid'] );
 
                        // There is only one ID, use it
                        $ids = array_keys( $pageSet->getGoodTitles() );
                        $this->addWhereFld( 'rev_page', reset( $ids ) );
 
-                       if ( !is_null( $params['user'] ) ) {
-                               $this->addWhereFld( 'rev_user_text', 
$params['user'] );
-                       } elseif ( !is_null( $params['excludeuser'] ) ) {
-                               $this->addWhere( 'rev_user_text != ' .
-                                       $db->addQuotes( $params['excludeuser'] 
) );
+                       if ( $params['user'] !== null ) {
+                               $user = User::newFromName( $params['user'] );
+                               if ( $user && $user->getId() > 0 ) {
+                                       $this->addWhereFld( 'rev_user', 
$user->getId() );
+                               } else {
+                                       $this->addWhereFld( 'rev_user_text', 
$params['user'] );
+                               }
+                       } elseif ( $params['excludeuser'] !== null ) {
+                               $user = User::newFromName( 
$params['excludeuser'] );
+                               if ( $user && $user->getId() > 0 ) {
+                                       $this->addWhere( 'rev_user != ' . 
$user->getId() );
+                               } else {
+                                       $this->addWhere( 'rev_user_text != ' .
+                                               $db->addQuotes( 
$params['excludeuser'] ) );
+                               }
                        }
-                       if ( !is_null( $params['user'] ) || !is_null( 
$params['excludeuser'] ) ) {
+                       if ( $params['user'] !== null || $params['excludeuser'] 
!== null ) {
                                // Paranoia: avoid brute force searches (bug 
17342)
                                if ( !$this->getUser()->isAllowed( 
'deletedhistory' ) ) {
                                        $bitmask = Revision::DELETED_USER;
@@ -261,16 +280,20 @@
                                }
                        }
                } elseif ( $revCount > 0 ) {
+                       // Always targets the PRIMARY index
+
                        $revs = $pageSet->getLiveRevisionIDs();
 
                        // Get all revision IDs
                        $this->addWhereFld( 'rev_id', array_keys( $revs ) );
 
-                       if ( !is_null( $params['continue'] ) ) {
+                       if ( $params['continue'] !== null ) {
                                $this->addWhere( 'rev_id >= ' . intval( 
$params['continue'] ) );
                        }
                        $this->addOption( 'ORDER BY', 'rev_id' );
                } elseif ( $pageCount > 0 ) {
+                       // Always targets the rev_page_id index
+
                        $titles = $pageSet->getGoodTitles();
 
                        // When working in multi-page non-enumeration mode,
@@ -282,7 +305,7 @@
                        // Every time someone relies on equality propagation, 
god kills a kitten :)
                        $this->addWhereFld( 'rev_page', array_keys( $titles ) );
 
-                       if ( !is_null( $params['continue'] ) ) {
+                       if ( $params['continue'] !== null ) {
                                $cont = explode( '|', $params['continue'] );
                                $this->dieContinueUsageIf( count( $cont ) != 2 
);
                                $pageid = intval( $cont[0] );
@@ -312,7 +335,8 @@
                                // We've reached the one extra which shows that 
there are
                                // additional pages to be had. Stop here...
                                if ( $enumRevMode ) {
-                                       $this->setContinueEnumParameter( 
'continue', intval( $row->rev_id ) );
+                                       $this->setContinueEnumParameter( 
'continue',
+                                               $row->rev_timestamp . '|' . 
intval( $row->rev_id ) );
                                } elseif ( $revCount > 0 ) {
                                        $this->setContinueEnumParameter( 
'continue', intval( $row->rev_id ) );
                                } else {
@@ -344,7 +368,8 @@
                                $fit = $this->addPageSubItem( $row->rev_page, 
$rev, 'rev' );
                                if ( !$fit ) {
                                        if ( $enumRevMode ) {
-                                               
$this->setContinueEnumParameter( 'continue', intval( $row->rev_id ) );
+                                               
$this->setContinueEnumParameter( 'continue',
+                                                       $row->rev_timestamp . 
'|' . intval( $row->rev_id ) );
                                        } elseif ( $revCount > 0 ) {
                                                
$this->setContinueEnumParameter( 'continue', intval( $row->rev_id ) );
                                        } else {

-- 
To view, visit https://gerrit.wikimedia.org/r/188843
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: merged
Gerrit-Change-Id: Ie175c6014e75848e9dda6b413175c8575d1ef6af
Gerrit-PatchSet: 4
Gerrit-Project: mediawiki/core
Gerrit-Branch: master
Gerrit-Owner: Anomie <bjor...@wikimedia.org>
Gerrit-Reviewer: Aaron Schulz <asch...@wikimedia.org>
Gerrit-Reviewer: Legoktm <legoktm.wikipe...@gmail.com>
Gerrit-Reviewer: Springle <sprin...@wikimedia.org>
Gerrit-Reviewer: jenkins-bot <>

_______________________________________________
MediaWiki-commits mailing list
MediaWiki-commits@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to