ArielGlenn has uploaded a new change for review. https://gerrit.wikimedia.org/r/295387
Change subject: add option to XML dump stubs of page ranges with explicit rev_id ordering ...................................................................... add option to XML dump stubs of page ranges with explicit rev_id ordering [WIP] utterly untested. Change-Id: I94ca4a06235bdbed384bb997deb7432bb5aaa5b9 --- M includes/export/WikiExporter.php M maintenance/backup.inc M maintenance/dumpBackup.php 3 files changed, 15 insertions(+), 5 deletions(-) git pull ssh://gerrit.wikimedia.org:29418/mediawiki/core refs/changes/87/295387/1 diff --git a/includes/export/WikiExporter.php b/includes/export/WikiExporter.php index 54de26d..1da05bc 100644 --- a/includes/export/WikiExporter.php +++ b/includes/export/WikiExporter.php @@ -134,13 +134,14 @@ * @param int $start Inclusive lower limit (this id is included) * @param int $end Exclusive upper limit (this id is not included) * If 0, no upper limit. + * @param bool $orderRevs order revisions within pages in ascending order */ - public function pagesByRange( $start, $end ) { + public function pagesByRange( $start, $end, $orderRevs ) { $condition = 'page_id >= ' . intval( $start ); if ( $end ) { $condition .= ' AND page_id < ' . intval( $end ); } - $this->dumpFrom( $condition ); + $this->dumpFrom( $condition, $orderRevs ); } /** @@ -245,7 +246,7 @@ * @throws MWException * @throws Exception */ - protected function dumpFrom( $cond = '' ) { + protected function dumpFrom( $cond = '', $orderRevs = false ) { # For logging dumps... if ( $this->history & self::LOGS ) { $where = [ 'user_id = log_user' ]; @@ -333,6 +334,12 @@ } elseif ( $this->history & WikiExporter::FULL ) { # Full history dumps... $join['revision'] = [ 'INNER JOIN', 'page_id=rev_page' ]; + # query optimization for history stub dumps + if ( $this->text == WikiExporter::STUB && $orderRevs ) { + $opts[] = 'STRAIGHT_JOIN'; + $opts['ORDER BY'] = [ 'rev_page ASC', 'rev_id ASC' ]; + $opts['USE INDEX']['page'] = 'PRIMARY'; + } } elseif ( $this->history & WikiExporter::CURRENT ) { # Latest revision dumps... if ( $this->list_authors && $cond != '' ) { // List authors, if so desired @@ -369,7 +376,6 @@ if ( $this->buffer == WikiExporter::STREAM ) { $prev = $this->db->bufferResults( false ); } - $result = null; // Assuring $result is not undefined, if exception occurs early try { Hooks::run( 'ModifyExportQuery', diff --git a/maintenance/backup.inc b/maintenance/backup.inc index 3271fd6..db3af92 100644 --- a/maintenance/backup.inc +++ b/maintenance/backup.inc @@ -41,6 +41,7 @@ public $revEndId = 0; public $dumpUploads = false; public $dumpUploadFileContents = false; + public $orderRevs = false; protected $reportingInterval = 100; protected $pageCount = 0; @@ -271,7 +272,7 @@ } elseif ( is_null( $this->pages ) ) { # Page dumps: all or by page ID range if ( $this->startId || $this->endId ) { - $exporter->pagesByRange( $this->startId, $this->endId ); + $exporter->pagesByRange( $this->startId, $this->endId, $this->orderRevs ); } elseif ( $this->revStartId || $this->revEndId ) { $exporter->revsByRange( $this->revStartId, $this->revEndId ); } else { diff --git a/maintenance/dumpBackup.php b/maintenance/dumpBackup.php index d4255a0..60ee99d 100644 --- a/maintenance/dumpBackup.php +++ b/maintenance/dumpBackup.php @@ -50,6 +50,8 @@ $this->addOption( 'stable', 'Dump stable versions of pages' ); $this->addOption( 'revrange', 'Dump range of revisions specified by revstart and ' . 'revend parameters' ); + $this->addOption( 'orderrevs', 'Dump revisions in ascending revision order ' . + '(implies dump of a range of pages)'); $this->addOption( 'pagelist', 'Dump only pages included in the file', false, true ); // Options @@ -127,6 +129,7 @@ $this->skipFooter = $this->hasOption( 'skip-footer' ); $this->dumpUploads = $this->hasOption( 'uploads' ); $this->dumpUploadFileContents = $this->hasOption( 'include-files' ); + $this->orderRevs = $this->hasOption( 'orderrevs' ); } } -- To view, visit https://gerrit.wikimedia.org/r/295387 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: newchange Gerrit-Change-Id: I94ca4a06235bdbed384bb997deb7432bb5aaa5b9 Gerrit-PatchSet: 1 Gerrit-Project: mediawiki/core Gerrit-Branch: master Gerrit-Owner: ArielGlenn <ar...@wikimedia.org> _______________________________________________ MediaWiki-commits mailing list MediaWiki-commits@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits