jenkins-bot has submitted this change and it was merged. Change subject: add option to XML dump stubs of page ranges with explicit rev_id ordering ......................................................................
add option to XML dump stubs of page ranges with explicit rev_id ordering tested for stubs, text, logging with and without start/end values, with and without orderrevs, seems to work as expected, with the appropriate changes to the query. Bug: T29112 Change-Id: I94ca4a06235bdbed384bb997deb7432bb5aaa5b9 --- M includes/export/WikiExporter.php M maintenance/backup.inc M maintenance/dumpBackup.php 3 files changed, 29 insertions(+), 9 deletions(-) Approvals: Chad: Looks good to me, approved jenkins-bot: Verified diff --git a/includes/export/WikiExporter.php b/includes/export/WikiExporter.php index 54de26d..c1f2d59 100644 --- a/includes/export/WikiExporter.php +++ b/includes/export/WikiExporter.php @@ -134,13 +134,21 @@ * @param int $start Inclusive lower limit (this id is included) * @param int $end Exclusive upper limit (this id is not included) * If 0, no upper limit. + * @param bool $orderRevs order revisions within pages in ascending order */ - public function pagesByRange( $start, $end ) { - $condition = 'page_id >= ' . intval( $start ); - if ( $end ) { - $condition .= ' AND page_id < ' . intval( $end ); + public function pagesByRange( $start, $end, $orderRevs ) { + if ( $orderRevs ) { + $condition = 'rev_page >= ' . intval( $start ); + if ( $end ) { + $condition .= ' AND rev_page < ' . intval( $end ); + } + } else { + $condition = 'page_id >= ' . intval( $start ); + if ( $end ) { + $condition .= ' AND page_id < ' . intval( $end ); + } } - $this->dumpFrom( $condition ); + $this->dumpFrom( $condition, $orderRevs ); } /** @@ -245,7 +253,7 @@ * @throws MWException * @throws Exception */ - protected function dumpFrom( $cond = '' ) { + protected function dumpFrom( $cond = '', $orderRevs = false ) { # For logging dumps... if ( $this->history & self::LOGS ) { $where = [ 'user_id = log_user' ]; @@ -332,7 +340,16 @@ } } elseif ( $this->history & WikiExporter::FULL ) { # Full history dumps... - $join['revision'] = [ 'INNER JOIN', 'page_id=rev_page' ]; + # query optimization for history stub dumps + if ( $this->text == WikiExporter::STUB && $orderRevs ) { + $tables = [ 'revision', 'page' ]; + $opts[] = 'STRAIGHT_JOIN'; + $opts['ORDER BY'] = [ 'rev_page ASC', 'rev_id ASC' ]; + $opts['USE INDEX']['revision'] = 'rev_page_id'; + $join['page'] = [ 'INNER JOIN', 'rev_page=page_id' ]; + } else { + $join['revision'] = [ 'INNER JOIN', 'page_id=rev_page' ]; + } } elseif ( $this->history & WikiExporter::CURRENT ) { # Latest revision dumps... if ( $this->list_authors && $cond != '' ) { // List authors, if so desired @@ -369,7 +386,6 @@ if ( $this->buffer == WikiExporter::STREAM ) { $prev = $this->db->bufferResults( false ); } - $result = null; // Assuring $result is not undefined, if exception occurs early try { Hooks::run( 'ModifyExportQuery', diff --git a/maintenance/backup.inc b/maintenance/backup.inc index 3271fd6..db3af92 100644 --- a/maintenance/backup.inc +++ b/maintenance/backup.inc @@ -41,6 +41,7 @@ public $revEndId = 0; public $dumpUploads = false; public $dumpUploadFileContents = false; + public $orderRevs = false; protected $reportingInterval = 100; protected $pageCount = 0; @@ -271,7 +272,7 @@ } elseif ( is_null( $this->pages ) ) { # Page dumps: all or by page ID range if ( $this->startId || $this->endId ) { - $exporter->pagesByRange( $this->startId, $this->endId ); + $exporter->pagesByRange( $this->startId, $this->endId, $this->orderRevs ); } elseif ( $this->revStartId || $this->revEndId ) { $exporter->revsByRange( $this->revStartId, $this->revEndId ); } else { diff --git a/maintenance/dumpBackup.php b/maintenance/dumpBackup.php index d4255a0..9bf1222 100644 --- a/maintenance/dumpBackup.php +++ b/maintenance/dumpBackup.php @@ -50,6 +50,8 @@ $this->addOption( 'stable', 'Dump stable versions of pages' ); $this->addOption( 'revrange', 'Dump range of revisions specified by revstart and ' . 'revend parameters' ); + $this->addOption( 'orderrevs', 'Dump revisions in ascending revision order ' . + '(implies dump of a range of pages)' ); $this->addOption( 'pagelist', 'Dump only pages included in the file', false, true ); // Options @@ -127,6 +129,7 @@ $this->skipFooter = $this->hasOption( 'skip-footer' ); $this->dumpUploads = $this->hasOption( 'uploads' ); $this->dumpUploadFileContents = $this->hasOption( 'include-files' ); + $this->orderRevs = $this->hasOption( 'orderrevs' ); } } -- To view, visit https://gerrit.wikimedia.org/r/296614 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: merged Gerrit-Change-Id: I94ca4a06235bdbed384bb997deb7432bb5aaa5b9 Gerrit-PatchSet: 1 Gerrit-Project: mediawiki/core Gerrit-Branch: wmf/1.28.0-wmf.7 Gerrit-Owner: ArielGlenn <ar...@wikimedia.org> Gerrit-Reviewer: ArielGlenn <ar...@wikimedia.org> Gerrit-Reviewer: Chad <ch...@wikimedia.org> Gerrit-Reviewer: Parent5446 <tylerro...@gmail.com> Gerrit-Reviewer: jenkins-bot <> _______________________________________________ MediaWiki-commits mailing list MediaWiki-commits@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits