Matthias Mullie has uploaded a new change for review.

  https://gerrit.wikimedia.org/r/183269

Change subject: Allow multiple types in Reindexer
......................................................................

Allow multiple types in Reindexer

Change-Id: I044594e060cebf52890aa0b45a99cd625af142b2
---
M includes/Maintenance/ConfigUtils.php
M includes/Maintenance/Reindexer.php
M includes/Maintenance/Validators/IndexAllAliasValidator.php
M maintenance/updateOneSearchIndexConfig.php
4 files changed, 50 insertions(+), 33 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/mediawiki/extensions/CirrusSearch 
refs/changes/69/183269/1

diff --git a/includes/Maintenance/ConfigUtils.php 
b/includes/Maintenance/ConfigUtils.php
index 6c980e3..751c791 100644
--- a/includes/Maintenance/ConfigUtils.php
+++ b/includes/Maintenance/ConfigUtils.php
@@ -98,7 +98,7 @@
                        } else {
                                $identifier = 'first';
                        }
-                       $this->output( "${typeName}_${identifier}\n ");
+                       $this->output( "${typeName}_${identifier}\n");
                        return $identifier;
                }
                return $option;
diff --git a/includes/Maintenance/Reindexer.php 
b/includes/Maintenance/Reindexer.php
index 3f5b170..4610861 100644
--- a/includes/Maintenance/Reindexer.php
+++ b/includes/Maintenance/Reindexer.php
@@ -47,14 +47,14 @@
        private $specificIndexName;
 
        /**
-        * @var Type
+        * @var Type[]
         */
-       private $type;
+       private $types;
 
        /**
-        * @var Type
+        * @var Type[]
         */
-       private $oldType;
+       private $oldTypes;
 
        /**
         * @var int
@@ -94,8 +94,8 @@
        /**
         * @param Index $index
         * @param \ElasticaConnection $connection
-        * @param Type $type
-        * @param Type $oldType
+        * @param Type[] $types
+        * @param Type[] $oldTypes
         * @param int $shardCount
         * @param string $replicaCount
         * @param int $connectionTimeout
@@ -103,14 +103,14 @@
         * @param array $mappingConfig
         * @param Maintenance $out
         */
-       public function __construct( Index $index, \ElasticaConnection 
$connection, Type $type, Type $oldType, $shardCount, $replicaCount, 
$connectionTimeout, array $mergeSettings, array $mappingConfig, Maintenance 
$out = null ) {
+       public function __construct( Index $index, \ElasticaConnection 
$connection, array $types, array $oldTypes, $shardCount, $replicaCount, 
$connectionTimeout, array $mergeSettings, array $mappingConfig, Maintenance 
$out = null ) {
                // @todo: this constructor has too many arguments - refactor!
                $this->index = $index;
                $this->client = $this->index->getClient();
                $this->specificIndexName = $this->index->getName();
                $this->connection = $connection;
-               $this->type = $type;
-               $this->oldType = $oldType;
+               $this->types = $types;
+               $this->oldTypes = $oldTypes;
                $this->shardCount = $shardCount;
                $this->replicaCount = $replicaCount;
                $this->connectionTimeout = $connectionTimeout;
@@ -150,7 +150,10 @@
 
                        switch ( $forkResult ) {
                                case 'child':
-                                       $this->reindexInternal( $processes, 
$fork->getChildNumber(), $chunkSize, $retryAttempts );
+                                       foreach ( $this->types as $i => $type ) 
{
+                                               $oldType = $this->oldTypes[$i];
+                                               $this->reindexInternal( $type, 
$oldType, $processes, $fork->getChildNumber(), $chunkSize, $retryAttempts );
+                                       }
                                        die( 0 );
                                case 'done':
                                        break;
@@ -161,19 +164,25 @@
                        $this->outputIndented( "Verifying counts..." );
                        // We can't verify counts are exactly equal because 
they won't be - we still push updates into
                        // the old index while reindexing the new one.
-                       $oldCount = (float) $this->oldType->count();
-                       $this->index->refresh();
-                       $newCount = (float) $this->type->count();
-                       $difference = $oldCount > 0 ? abs( $oldCount - 
$newCount ) / $oldCount : 0;
-                       if ( $difference > $acceptableCountDeviation ) {
-                               $this->output( "Not close enough!  
old=$oldCount new=$newCount difference=$difference\n" );
-                               $this->error( 'Failed to load index - counts 
not close enough.  ' .
-                                       "old=$oldCount new=$newCount 
difference=$difference.  " .
-                                       'Check for warnings above.', 1 );
+                       foreach ( $this->types as $i => $type ) {
+                               $oldType = $this->oldTypes[$i];
+                               $oldCount = (float) $oldType->count();
+                               $this->index->refresh();
+                               $newCount = (float) $type->count();
+                               $difference = $oldCount > 0 ? abs( $oldCount - 
$newCount ) / $oldCount : 0;
+                               if ( $difference > $acceptableCountDeviation ) {
+                                       $this->output( "Not close enough!  
old=$oldCount new=$newCount difference=$difference\n" );
+                                       $this->error( 'Failed to load index - 
counts not close enough.  ' .
+                                               "old=$oldCount new=$newCount 
difference=$difference.  " .
+                                               'Check for warnings above.', 1 
);
+                               }
                        }
                        $this->output( "done\n" );
                } else {
-                       $this->reindexInternal( 1, 1, $chunkSize, 
$retryAttempts );
+                       foreach ( $this->types as $i => $type ) {
+                               $oldType = $this->oldTypes[$i];
+                               $this->reindexInternal( $type, $oldType, 1, 1, 
$chunkSize, $retryAttempts );
+                       }
                }
 
                // Revert settings changed just for reindexing
@@ -237,7 +246,7 @@
                }
        }
 
-       private function reindexInternal( $children, $childNumber, $chunkSize, 
$retryAttempts ) {
+       private function reindexInternal( Type $type, Type $oldType, $children, 
$childNumber, $chunkSize, $retryAttempts ) {
                $filter = null;
                $messagePrefix = "";
                if ( $childNumber === 1 && $children === 1 ) {
@@ -254,7 +263,7 @@
                                'lang' => 'groovy'
                        ) );
                }
-               $properties = 
$this->mappingConfig[$this->oldType->getName()]['properties'];
+               $properties = 
$this->mappingConfig[$oldType->getName()]['properties'];
                try {
                        $query = new Query();
                        $query->setFields( array( '_id', '_source' ) );
@@ -263,7 +272,7 @@
                        }
 
                        // Note here we dump from the current index (using the 
alias) so we can use Connection::getPageType
-                       $result = $this->oldType
+                       $result = $oldType
                                ->search( $query, array(
                                        'search_type' => 'scan',
                                        'scroll' => '1h',
@@ -306,8 +315,8 @@
                                }
                                wfProfileOut( __METHOD__ . '::packageDocs' );
                                $this->withRetry( $retryAttempts, 
$messagePrefix, 'retrying as singles',
-                                       function() use ( $self, $messagePrefix, 
$documents ) {
-                                               $self->sendDocuments( 
$messagePrefix, $documents );
+                                       function() use ( $self, $type, 
$messagePrefix, $documents ) {
+                                               $self->sendDocuments( $type, 
$messagePrefix, $documents );
                                        } );
                                $completed += $result->count();
                                $rate = round( $completed / ( microtime( true ) 
- $operationStartTime ) );
@@ -379,16 +388,16 @@
                }
        }
 
-       private function sendDocuments( $messagePrefix, $documents ) {
+       private function sendDocuments( Type $type, $messagePrefix, $documents 
) {
                try {
-                       $this->type->addDocuments( $documents );
+                       $type->addDocuments( $documents );
                } catch ( ExceptionInterface $e ) {
-                       $type = get_class( $e );
+                       $errorType = get_class( $e );
                        $message = ElasticsearchIntermediary::extractMessage( 
$e );
-                       $this->outputIndented( $messagePrefix . "Error adding 
documents in bulk.  Retrying as singles.  Error type is '$type' and message is: 
 $message" );
+                       $this->outputIndented( $messagePrefix . "Error adding 
documents in bulk.  Retrying as singles.  Error type is '$errorType' and 
message is:  $message" );
                        foreach ( $documents as $document ) {
                                // Continue using the bulk api because we're 
used to it.
-                               $this->type->addDocuments( array( $document ) );
+                               $type->addDocuments( array( $document ) );
                        }
                }
        }
diff --git a/includes/Maintenance/Validators/IndexAllAliasValidator.php 
b/includes/Maintenance/Validators/IndexAllAliasValidator.php
index 3e7b310..369c77f 100644
--- a/includes/Maintenance/Validators/IndexAllAliasValidator.php
+++ b/includes/Maintenance/Validators/IndexAllAliasValidator.php
@@ -11,6 +11,14 @@
         */
        protected $shouldRemovePrefix;
 
+       /**
+        * @param Client $client
+        * @param string $aliasName
+        * @param string $specificIndexName
+        * @param bool $startOver
+        * @param string $type
+        * @param Maintenance $out
+        */
        public function __construct( Client $client, $aliasName, 
$specificIndexName, $startOver, $type, Maintenance $out = null ) {
                parent::__construct( $client, $aliasName, $specificIndexName, 
$startOver, $out );
                $this->shouldRemovePrefix = $type;
diff --git a/maintenance/updateOneSearchIndexConfig.php 
b/maintenance/updateOneSearchIndexConfig.php
index 9db9171..43320ab 100644
--- a/maintenance/updateOneSearchIndexConfig.php
+++ b/maintenance/updateOneSearchIndexConfig.php
@@ -346,8 +346,8 @@
                $reindexer = new Reindexer(
                        $this->getIndex(),
                        Connection::getSingleton(),
-                       $this->getPageType(),
-                       $this->getOldPageType(),
+                       array( $this->getPageType() ),
+                       array( $this->getOldPageType() ),
                        $this->getShardCount(),
                        $this->getReplicaCount(),
                        $wgCirrusSearchMaintenanceTimeout,

-- 
To view, visit https://gerrit.wikimedia.org/r/183269
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: I044594e060cebf52890aa0b45a99cd625af142b2
Gerrit-PatchSet: 1
Gerrit-Project: mediawiki/extensions/CirrusSearch
Gerrit-Branch: master
Gerrit-Owner: Matthias Mullie <mmul...@wikimedia.org>

_______________________________________________
MediaWiki-commits mailing list
MediaWiki-commits@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to