jenkins-bot has submitted this change and it was merged. Change subject: Allow multiple types in Reindexer ......................................................................
Allow multiple types in Reindexer Change-Id: I044594e060cebf52890aa0b45a99cd625af142b2 --- M includes/Maintenance/ConfigUtils.php M includes/Maintenance/Reindexer.php M includes/Maintenance/Validators/IndexAllAliasValidator.php M maintenance/updateOneSearchIndexConfig.php 4 files changed, 50 insertions(+), 33 deletions(-) Approvals: Chad: Looks good to me, approved jenkins-bot: Verified diff --git a/includes/Maintenance/ConfigUtils.php b/includes/Maintenance/ConfigUtils.php index dfc81fe..014cdc2 100644 --- a/includes/Maintenance/ConfigUtils.php +++ b/includes/Maintenance/ConfigUtils.php @@ -98,7 +98,7 @@ } else { $identifier = 'first'; } - $this->output( "${typeName}_${identifier}\n "); + $this->output( "${typeName}_${identifier}\n"); return $identifier; } return $option; diff --git a/includes/Maintenance/Reindexer.php b/includes/Maintenance/Reindexer.php index 6779426..5b4de27 100644 --- a/includes/Maintenance/Reindexer.php +++ b/includes/Maintenance/Reindexer.php @@ -47,14 +47,14 @@ private $specificIndexName; /** - * @var Type + * @var Type[] */ - private $type; + private $types; /** - * @var Type + * @var Type[] */ - private $oldType; + private $oldTypes; /** * @var int @@ -94,8 +94,8 @@ /** * @param Index $index * @param \ElasticaConnection $connection - * @param Type $type - * @param Type $oldType + * @param Type[] $types + * @param Type[] $oldTypes * @param int $shardCount * @param string $replicaCount * @param int $connectionTimeout @@ -103,14 +103,14 @@ * @param array $mappingConfig * @param Maintenance $out */ - public function __construct( Index $index, \ElasticaConnection $connection, Type $type, Type $oldType, $shardCount, $replicaCount, $connectionTimeout, array $mergeSettings, array $mappingConfig, Maintenance $out = null ) { + public function __construct( Index $index, \ElasticaConnection $connection, array $types, array $oldTypes, $shardCount, $replicaCount, $connectionTimeout, array $mergeSettings, array $mappingConfig, Maintenance $out = null ) { // @todo: this constructor has too many arguments - refactor! $this->index = $index; $this->client = $this->index->getClient(); $this->specificIndexName = $this->index->getName(); $this->connection = $connection; - $this->type = $type; - $this->oldType = $oldType; + $this->types = $types; + $this->oldTypes = $oldTypes; $this->shardCount = $shardCount; $this->replicaCount = $replicaCount; $this->connectionTimeout = $connectionTimeout; @@ -150,7 +150,10 @@ switch ( $forkResult ) { case 'child': - $this->reindexInternal( $processes, $fork->getChildNumber(), $chunkSize, $retryAttempts ); + foreach ( $this->types as $i => $type ) { + $oldType = $this->oldTypes[$i]; + $this->reindexInternal( $type, $oldType, $processes, $fork->getChildNumber(), $chunkSize, $retryAttempts ); + } die( 0 ); case 'done': break; @@ -161,19 +164,25 @@ $this->outputIndented( "Verifying counts..." ); // We can't verify counts are exactly equal because they won't be - we still push updates into // the old index while reindexing the new one. - $oldCount = (float) $this->oldType->count(); - $this->index->refresh(); - $newCount = (float) $this->type->count(); - $difference = $oldCount > 0 ? abs( $oldCount - $newCount ) / $oldCount : 0; - if ( $difference > $acceptableCountDeviation ) { - $this->output( "Not close enough! old=$oldCount new=$newCount difference=$difference\n" ); - $this->error( 'Failed to load index - counts not close enough. ' . - "old=$oldCount new=$newCount difference=$difference. " . - 'Check for warnings above.', 1 ); + foreach ( $this->types as $i => $type ) { + $oldType = $this->oldTypes[$i]; + $oldCount = (float) $oldType->count(); + $this->index->refresh(); + $newCount = (float) $type->count(); + $difference = $oldCount > 0 ? abs( $oldCount - $newCount ) / $oldCount : 0; + if ( $difference > $acceptableCountDeviation ) { + $this->output( "Not close enough! old=$oldCount new=$newCount difference=$difference\n" ); + $this->error( 'Failed to load index - counts not close enough. ' . + "old=$oldCount new=$newCount difference=$difference. " . + 'Check for warnings above.', 1 ); + } } $this->output( "done\n" ); } else { - $this->reindexInternal( 1, 1, $chunkSize, $retryAttempts ); + foreach ( $this->types as $i => $type ) { + $oldType = $this->oldTypes[$i]; + $this->reindexInternal( $type, $oldType, 1, 1, $chunkSize, $retryAttempts ); + } } // Revert settings changed just for reindexing @@ -237,7 +246,7 @@ } } - private function reindexInternal( $children, $childNumber, $chunkSize, $retryAttempts ) { + private function reindexInternal( Type $type, Type $oldType, $children, $childNumber, $chunkSize, $retryAttempts ) { $filter = null; $messagePrefix = ""; if ( $childNumber === 1 && $children === 1 ) { @@ -254,7 +263,7 @@ 'lang' => 'groovy' ) ); } - $properties = $this->mappingConfig[$this->oldType->getName()]['properties']; + $properties = $this->mappingConfig[$oldType->getName()]['properties']; try { $query = new Query(); $query->setFields( array( '_id', '_source' ) ); @@ -263,7 +272,7 @@ } // Note here we dump from the current index (using the alias) so we can use Connection::getPageType - $result = $this->oldType + $result = $oldType ->search( $query, array( 'search_type' => 'scan', 'scroll' => '1h', @@ -302,8 +311,8 @@ $result->next(); } $this->withRetry( $retryAttempts, $messagePrefix, 'retrying as singles', - function() use ( $self, $messagePrefix, $documents ) { - $self->sendDocuments( $messagePrefix, $documents ); + function() use ( $self, $type, $messagePrefix, $documents ) { + $self->sendDocuments( $type, $messagePrefix, $documents ); } ); $completed += $result->count(); $rate = round( $completed / ( microtime( true ) - $operationStartTime ) ); @@ -378,16 +387,16 @@ /** * This is really private. */ - public function sendDocuments( $messagePrefix, $documents ) { + public function sendDocuments( Type $type, $messagePrefix, $documents ) { try { - $this->type->addDocuments( $documents ); + $type->addDocuments( $documents ); } catch ( ExceptionInterface $e ) { - $type = get_class( $e ); + $errorType = get_class( $e ); $message = ElasticsearchIntermediary::extractMessage( $e ); - $this->outputIndented( $messagePrefix . "Error adding documents in bulk. Retrying as singles. Error type is '$type' and message is: $message" ); + $this->outputIndented( $messagePrefix . "Error adding documents in bulk. Retrying as singles. Error type is '$errorType' and message is: $message" ); foreach ( $documents as $document ) { // Continue using the bulk api because we're used to it. - $this->type->addDocuments( array( $document ) ); + $type->addDocuments( array( $document ) ); } } } diff --git a/includes/Maintenance/Validators/IndexAllAliasValidator.php b/includes/Maintenance/Validators/IndexAllAliasValidator.php index 251b704..7b369bd 100644 --- a/includes/Maintenance/Validators/IndexAllAliasValidator.php +++ b/includes/Maintenance/Validators/IndexAllAliasValidator.php @@ -11,6 +11,14 @@ */ protected $shouldRemovePrefix; + /** + * @param Client $client + * @param string $aliasName + * @param string $specificIndexName + * @param bool $startOver + * @param string $type + * @param Maintenance $out + */ public function __construct( Client $client, $aliasName, $specificIndexName, $startOver, $type, Maintenance $out = null ) { parent::__construct( $client, $aliasName, $specificIndexName, $startOver, $out ); $this->shouldRemovePrefix = $type; diff --git a/maintenance/updateOneSearchIndexConfig.php b/maintenance/updateOneSearchIndexConfig.php index f9ed3f8..fe86a77 100644 --- a/maintenance/updateOneSearchIndexConfig.php +++ b/maintenance/updateOneSearchIndexConfig.php @@ -349,8 +349,8 @@ $reindexer = new Reindexer( $this->getIndex(), Connection::getSingleton(), - $this->getPageType(), - $this->getOldPageType(), + array( $this->getPageType() ), + array( $this->getOldPageType() ), $this->getShardCount(), $this->getReplicaCount(), $wgCirrusSearchMaintenanceTimeout, -- To view, visit https://gerrit.wikimedia.org/r/183269 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: merged Gerrit-Change-Id: I044594e060cebf52890aa0b45a99cd625af142b2 Gerrit-PatchSet: 7 Gerrit-Project: mediawiki/extensions/CirrusSearch Gerrit-Branch: master Gerrit-Owner: Matthias Mullie <mmul...@wikimedia.org> Gerrit-Reviewer: Chad <ch...@wikimedia.org> Gerrit-Reviewer: Manybubbles <never...@wikimedia.org> Gerrit-Reviewer: jenkins-bot <> _______________________________________________ MediaWiki-commits mailing list MediaWiki-commits@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits