Manybubbles has uploaded a new change for review. https://gerrit.wikimedia.org/r/81866
Change subject: Carefully replace aliases to prevent duplicates. ...................................................................... Carefully replace aliases to prevent duplicates. This prevents duplicate results when searching across both content and non-content namespaces during a reindex. Bug 53484 Change-Id: Ieb897fde4963add14f861f1544d565a67fdaa18e --- M updateOneSearchIndexConfig.php 1 file changed, 37 insertions(+), 10 deletions(-) git pull ssh://gerrit.wikimedia.org:29418/mediawiki/extensions/CirrusSearch refs/changes/66/81866/1 diff --git a/updateOneSearchIndexConfig.php b/updateOneSearchIndexConfig.php index b209721..82aeac2 100644 --- a/updateOneSearchIndexConfig.php +++ b/updateOneSearchIndexConfig.php @@ -40,6 +40,10 @@ private $indent; private $returnCode = 0; + // Set with the name of any old indecies to remove if any must be during the alias maintenance + // steps. + private $removeIndecies = false; + public function __construct() { parent::__construct(); $this->addDescription( "Update the configuration or contents of one search index." ); @@ -244,10 +248,13 @@ private function validateAlias() { $this->output( $this->indent . "Validating aliases...\n" ); - // Validate the all alias first because the old index can be removed as a side effect of correcting - // the specific alias. This way the all alias is always pointing to at least one useful index. - $this->validateAllAlias(); + // Since validate the specific alias first as that can cause reindexing + // and we want the all index to stay with the old index during reindexing $this->validateSpecificAlias(); + $this->validateAllAlias(); + // Note that at this point both the old and the new index can have the all + // alias but this should be for a very short time. Like, under a second. + $this->removeOldIndeciesIfRequired(); } /** @@ -282,11 +289,7 @@ $this->output( $this->indent . "\tSwapping alias..."); $this->getIndex()->addAlias( $this->getIndexTypeName(), true ); $this->output( "done\n" ); - $this->output( $this->indent . "\tRemoving old index..." ); - foreach ( $otherIndeciesWithAlias as $otherIndex ) { - CirrusSearchConnection::getClient()->getIndex( $otherIndex )->delete(); - } - $this->output( "done\n" ); + $this->removeIndecies = $otherIndeciesWithAlias; return; } $this->output( "cannot correct!\n" ); @@ -300,18 +303,42 @@ public function validateAllAlias() { $this->output( $this->indent . "\tValidating all alias..." ); + $allAliasName = CirrusSearchConnection::getIndexName(); foreach ( CirrusSearchConnection::getClient()->getStatus() - ->getIndicesWithAlias( CirrusSearchConnection::getIndexName() ) as $index ) { + ->getIndicesWithAlias( $allAliasName ) as $index ) { if( $index->getName() === $this->getSpecificIndexName() ) { $this->output( "ok\n" ); return; } } $this->output( "alias not already assigned to this index..." ); - $this->getIndex()->addAlias( CirrusSearchConnection::getIndexName(), false ); + // We'll remove the all alias from the indecies that we're about to delete while + // we add it to this index. Elastica doesn't support this well so we have to + // build the request to Elasticsearch ourselves. + $data = array( + 'action' => array( + array( 'add' => array( 'index' => $this->getSpecificIndexName(), 'alias' => $allAliasName ) ) + ) + ); + if ( $this->removeIndecies ) { + foreach ( $this->removeIndecies as $oldIndex ) { + $data['action'][] = array( 'remove' => array( 'index' => $oldIndex, 'alias' => $allAliasName ) ); + } + } + CirrusSearchConnection::getClient()->request( '_aliases', \Elastica\Request::POST, $data ); $this->output( "corrected\n" ); } + public function removeOldIndeciesIfRequired() { + if ( $this->removeIndecies ) { + $this->output( $this->indent . "\tRemoving old indecies..." ); + foreach ( $this->removeIndecies as $oldIndex ) { + CirrusSearchConnection::getClient()->getIndex( $oldIndex )->delete(); + } + $this->output( "done\n" ); + } + } + /** * Rebuild the index by pulling everything out of it and putting it back in. This should be faster than * reparsing everything. -- To view, visit https://gerrit.wikimedia.org/r/81866 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: newchange Gerrit-Change-Id: Ieb897fde4963add14f861f1544d565a67fdaa18e Gerrit-PatchSet: 1 Gerrit-Project: mediawiki/extensions/CirrusSearch Gerrit-Branch: master Gerrit-Owner: Manybubbles <never...@wikimedia.org> _______________________________________________ MediaWiki-commits mailing list MediaWiki-commits@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits