Manybubbles has uploaded a new change for review.

  https://gerrit.wikimedia.org/r/81866


Change subject: Carefully replace aliases to prevent duplicates.
......................................................................

Carefully replace aliases to prevent duplicates.

This prevents duplicate results when searching across both content and
non-content namespaces during a reindex.

Bug 53484

Change-Id: Ieb897fde4963add14f861f1544d565a67fdaa18e
---
M updateOneSearchIndexConfig.php
1 file changed, 37 insertions(+), 10 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/mediawiki/extensions/CirrusSearch 
refs/changes/66/81866/1

diff --git a/updateOneSearchIndexConfig.php b/updateOneSearchIndexConfig.php
index b209721..82aeac2 100644
--- a/updateOneSearchIndexConfig.php
+++ b/updateOneSearchIndexConfig.php
@@ -40,6 +40,10 @@
        private $indent;
        private $returnCode = 0;
 
+       // Set with the name of any old indecies to remove if any must be 
during the alias maintenance
+       // steps.
+       private $removeIndecies = false;
+
        public function __construct() {
                parent::__construct();
                $this->addDescription( "Update the configuration or contents of 
one search index." );
@@ -244,10 +248,13 @@
 
        private function validateAlias() {
                $this->output( $this->indent . "Validating aliases...\n" );
-               // Validate the all alias first because the old index can be 
removed as a side effect of correcting
-               // the specific alias.  This way the all alias is always 
pointing to at least one useful index.
-               $this->validateAllAlias();
+               // Since validate the specific alias first as that can cause 
reindexing
+               // and we want the all index to stay with the old index during 
reindexing
                $this->validateSpecificAlias();
+               $this->validateAllAlias();
+               // Note that at this point both the old and the new index can 
have the all
+               // alias but this should be for a very short time.  Like, under 
a second.
+               $this->removeOldIndeciesIfRequired();
        }
 
        /**
@@ -282,11 +289,7 @@
                        $this->output( $this->indent . "\tSwapping alias...");
                        $this->getIndex()->addAlias( $this->getIndexTypeName(), 
true );
                        $this->output( "done\n" );
-                       $this->output( $this->indent . "\tRemoving old 
index..." );
-                       foreach ( $otherIndeciesWithAlias as $otherIndex ) {
-                               CirrusSearchConnection::getClient()->getIndex( 
$otherIndex )->delete();
-                       }
-                       $this->output( "done\n" );
+                       $this->removeIndecies = $otherIndeciesWithAlias;
                        return;
                }
                $this->output( "cannot correct!\n" );
@@ -300,18 +303,42 @@
 
        public function validateAllAlias() {
                $this->output( $this->indent . "\tValidating all alias..." );
+               $allAliasName = CirrusSearchConnection::getIndexName();
                foreach ( CirrusSearchConnection::getClient()->getStatus()
-                               ->getIndicesWithAlias( 
CirrusSearchConnection::getIndexName() ) as $index ) {
+                               ->getIndicesWithAlias( $allAliasName ) as 
$index ) {
                        if( $index->getName() === $this->getSpecificIndexName() 
) {
                                $this->output( "ok\n" );
                                return;
                        }
                }
                $this->output( "alias not already assigned to this index..." );
-               $this->getIndex()->addAlias( 
CirrusSearchConnection::getIndexName(), false );
+               // We'll remove the all alias from the indecies that we're 
about to delete while
+               // we add it to this index.  Elastica doesn't support this well 
so we have to
+               // build the request to Elasticsearch ourselves.
+               $data = array(
+                       'action' => array(
+                               array( 'add' => array( 'index' => 
$this->getSpecificIndexName(), 'alias' => $allAliasName ) )
+                       )
+               );
+               if ( $this->removeIndecies ) {
+                       foreach ( $this->removeIndecies as $oldIndex ) {
+                               $data['action'][] = array( 'remove' => array( 
'index' => $oldIndex, 'alias' => $allAliasName ) );
+                       }
+               }
+               CirrusSearchConnection::getClient()->request( '_aliases', 
\Elastica\Request::POST, $data );
                $this->output( "corrected\n" );
        }
 
+       public function removeOldIndeciesIfRequired() {
+               if ( $this->removeIndecies ) {
+                       $this->output( $this->indent . "\tRemoving old 
indecies..." );
+                       foreach ( $this->removeIndecies as $oldIndex ) {
+                               CirrusSearchConnection::getClient()->getIndex( 
$oldIndex )->delete();
+                       }
+                       $this->output( "done\n" );
+               }
+       }
+
        /**
         * Rebuild the index by pulling everything out of it and putting it 
back in.  This should be faster than
         * reparsing everything.

-- 
To view, visit https://gerrit.wikimedia.org/r/81866
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: Ieb897fde4963add14f861f1544d565a67fdaa18e
Gerrit-PatchSet: 1
Gerrit-Project: mediawiki/extensions/CirrusSearch
Gerrit-Branch: master
Gerrit-Owner: Manybubbles <never...@wikimedia.org>

_______________________________________________
MediaWiki-commits mailing list
MediaWiki-commits@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to