Manybubbles has uploaded a new change for review.

  https://gerrit.wikimedia.org/r/117317

Change subject: [WIP] Drop 0.90 compatibility
......................................................................

[WIP] Drop 0.90 compatibility

No need for funky 1.0 backwards compatibility stuff with script fields.
Replaced with _source filtering which is way easier to work with.

Elastica upgrade required a few changed because it made the settings results
simpler.

Change-Id: I08e1c2f30d11cab0495e7faa860940440ef1ad3c
---
M includes/ElasticsearchIntermediary.php
M includes/Result.php
M includes/ResultsType.php
M includes/Searcher.php
M maintenance/updateOneSearchIndexConfig.php
5 files changed, 29 insertions(+), 117 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/mediawiki/extensions/CirrusSearch 
refs/changes/17/117317/1

diff --git a/includes/ElasticsearchIntermediary.php 
b/includes/ElasticsearchIntermediary.php
index e7b883c..8802521 100644
--- a/includes/ElasticsearchIntermediary.php
+++ b/includes/ElasticsearchIntermediary.php
@@ -115,28 +115,6 @@
        }
 
        /**
-        * Unwrap a result that we expect to be a single value.
-        * @param mixed $data from Elastica result
-        * @return mixed the single result
-        */
-       public static function singleValue( $result, $name ) {
-               $data = $result->__get( $name );
-               if ( $data === null ) {
-                       return null;
-               }
-               // Elasticsearch 0.90 returns single results as, well, single 
results
-               if ( !is_array( $data ) ) {
-                       return $data;
-               }
-               // Elasticsearch 1.0 returns them as single node arrays
-               $count = count( $data );
-               if ( $count !== 1 ) {
-                       wfLogWarning( "Expected a single result for $name but 
got $count." );
-               }
-               return $data[ 0 ];
-       }
-
-       /**
         * Does this status represent an Elasticsearch parse error?
         * @param $status Status to check
         * @return boolean is this a parse error?
diff --git a/includes/Result.php b/includes/Result.php
index a07bf0d..9a69f06 100644
--- a/includes/Result.php
+++ b/includes/Result.php
@@ -46,8 +46,7 @@
                global $wgCirrusSearchShowScore;
 
                $this->maybeSetInterwiki( $result, $interwikis );
-               $this->mTitle = Title::makeTitle( 
ElasticsearchIntermediary::singleValue( $result, 'namespace' ),
-                       ElasticsearchIntermediary::singleValue( $result, 
'title' ), '', $this->interwiki );
+               $this->mTitle = Title::makeTitle( $result->namespace, 
$result->title, '', $this->interwiki );
                if ( $this->getTitle()->getNamespace() == NS_FILE ) {
                        $this->mImage = wfFindFile( $this->mTitle );
                }
@@ -55,8 +54,8 @@
                $data = $result->getData();
                // Not all results requested a word count. Just pretend we have 
none if so
                $this->wordCount = isset( $data['text.word_count'] ) ? 
$data['text.word_count'] : 0;
-               $this->byteSize = ElasticsearchIntermediary::singleValue( 
$result, 'text_bytes' );
-               $this->timestamp = ElasticsearchIntermediary::singleValue( 
$result, 'timestamp' );
+               $this->byteSize = $result->text_bytes;
+               $this->timestamp = $result->timestamp;
                $this->timestamp = new MWTimestamp( $this->timestamp );
                $highlights = $result->getHighlights();
                // TODO remove when Elasticsearch issue 3757 is fixed
diff --git a/includes/ResultsType.php b/includes/ResultsType.php
index 01486bd..1b3957e 100644
--- a/includes/ResultsType.php
+++ b/includes/ResultsType.php
@@ -22,8 +22,11 @@
  * http://www.gnu.org/copyleft/gpl.html
  */
 interface ResultsType {
-       function getFields();
-       function getScriptFields();
+       /**
+        * Get the source filtering to be used loading the result.
+        * @return false|string|array corresonding to Elasticsearch source 
filtering syntax
+        */
+       function getSourceFiltering();
        function getHighlightingConfiguration();
        function transformElasticsearchResult( $suggestPrefixes, 
$suggestSuffixes,
                $result, $searchContainedSyntax );
@@ -44,12 +47,8 @@
                $this->matchedAnalyzer = $matchedAnalyzer;
        }
 
-       public function getFields() {
+       public function getSourceFiltering() {
                return array( 'namespace', 'title' );
-       }
-
-       public function getScriptFields() {
-               return null;
        }
 
        public function getHighlightingConfiguration() {
@@ -80,8 +79,7 @@
                        $result, $searchContainedSyntax ) {
                $results = array();
                foreach( $result->getResults() as $r ) {
-                       $title = Title::makeTitle( 
ElasticsearchIntermediary::singleValue( $r, 'namespace' ),
-                               ElasticsearchIntermediary::singleValue( $r, 
'title' ) );
+                       $title = Title::makeTitle( $r->namespace, $r->title );
                        $highlights = $r->getHighlights();
 
                        // Now we have to use the highlights to figure out 
whether it was the title or the redirect
@@ -98,7 +96,7 @@
                                // Instead of getting the redirect's real 
namespace we're going to just use the namespace
                                // of the title.  This is not great but OK 
given that we can't find cross namespace
                                // redirects properly any way.
-                               $title = Title::makeTitle( 
ElasticsearchIntermediary::singleValue( $r, 'namespace' ), $redirectTitle );
+                               $title = Title::makeTitle( $r->namespace, 
$redirectTitle );
                        } else if ( !isset( $highlights[ 
"title.$this->matchedAnalyzer" ] ) ) {
                                // We're not really sure where the match came 
from so lets just pretend it was the title?
                                wfDebugLog( 'CirrusSearch', "Title search 
result type hit a match but we can't " .
@@ -114,17 +112,8 @@
 }
 
 class FullTextResultsType implements ResultsType {
-       public function getFields() {
-               return array( 'id', 'title', 'namespace',
-                       // 'redirect', was moved to a script field....
-                       'timestamp', 'text_bytes', 'text.word_count' );
-       }
-
-       public function getScriptFields() {
-               // Works for both 1.0 and 0.90.
-               // TODO remove this in favor of source filtering when we remove 
support for 0.90.
-               // see 
http://www.elasticsearch.org/guide/en/elasticsearch/reference/1.x/search-request-source-filtering.html
-               return array( 'redirect' => new \Elastica\Script( '_source[ 
"redirect" ]' ) );
+       public function getSourceFiltering() {
+               return array( 'id', 'title', 'namespace', 'redirect.*', 
'timestamp', 'text_bytes', 'text.word_count' );
        }
 
        /**
@@ -211,11 +200,7 @@
                return null;
        }
 
-       public function getFields() {
+       public function getSourceFiltering() {
                return array( 'namespace', 'namespace_text', 'title' );
-       }
-
-       public function getScriptFields() {
-               return null;
        }
 }
diff --git a/includes/Searcher.php b/includes/Searcher.php
index 30e0a9c..73c3cd5 100644
--- a/includes/Searcher.php
+++ b/includes/Searcher.php
@@ -695,11 +695,7 @@
                }
 
                $query = new Elastica\Query();
-               $query->setFields( $this->resultsType->getFields() );
-               $scriptFields = $this->resultsType->getScriptFields();
-               if ( $scriptFields ) {
-                       $query->setScriptFields( $scriptFields );
-               }
+               $query->setParam( '_source', 
$this->resultsType->getSourceFiltering() );
 
                $extraIndexes = array();
                if ( $this->namespaces ) {
diff --git a/maintenance/updateOneSearchIndexConfig.php 
b/maintenance/updateOneSearchIndexConfig.php
index ce7fb8d..601b745 100644
--- a/maintenance/updateOneSearchIndexConfig.php
+++ b/maintenance/updateOneSearchIndexConfig.php
@@ -98,11 +98,6 @@
         */
        private $reindexAcceptableCountDeviation;
 
-       /**
-        * @var bool is this Elasticsearch 0.90.x?
-        */
-       private $elasticsearch90;
-
        public function __construct() {
                parent::__construct();
                $this->addDescription( "Update the configuration or contents of 
one search index." );
@@ -192,7 +187,7 @@
                                        implode( ', ', $indexTypes ), 1 );
                        }
 
-                       $this->fetchElasticsearchVersion();
+                       $this->checkElasticsearchVersion();
 
                        if ( $this->getOption( 'forceOpen', false ) ) {
                                $this->getIndex()->open();
@@ -221,13 +216,18 @@
                }
        }
 
-       private function fetchElasticsearchVersion() {
+       private function checkElasticsearchVersion() {
                $this->output( $this->indent . 'Fetching Elasticsearch 
version...' );
                $result = Connection::getClient()->request( '' );
                $result = $result->getData();
                $result = $result[ 'version' ][ 'number' ];
-               $this->elasticsearch90 = strpos( $result, '0.90.' ) !== false;
-               $this->output( ( $this->elasticsearch90 ? '' : 'after ' ) . 
"0.90\n" );
+               $this->output( "$result..." );
+               if ( strpos( $result, '1.' ) !== 0 ) {
+                       $this->output( "Not supported!\n" );
+                       $this->error( "Only Elasticsearch 1.x is supported.  
Your version: $result.", 1 );
+               } else {
+                       $this->output( "OK\n" );
+               }
        }
 
        private function updateVersions() {
@@ -258,7 +258,7 @@
        private function validateIndexSettings() {
                $this->output( $this->indent . "\tValidating number of 
shards..." );
                $settings = $this->getSettings();
-               $actualShardCount = $settings[ 'index' ][ 'number_of_shards' ];
+               $actualShardCount = $settings[ 'number_of_shards' ];
                if ( $actualShardCount == $this->getShardCount() ) {
                        $this->output( "ok\n" );
                } else {
@@ -271,7 +271,7 @@
                }
 
                $this->output( $this->indent . "\tValidating number of 
replicas..." );
-               $actualReplicaCount = $settings[ 'index' ][ 
'number_of_replicas' ];
+               $actualReplicaCount = $settings[ 'number_of_replicas' ];
                if ( $actualReplicaCount == $this->getReplicaCount() ) {
                        $this->output( "ok\n" );
                } else {
@@ -286,7 +286,7 @@
                $settings = $this->getSettings();
                $analysisConfig = new AnalysisConfigBuilder( $this->langCode, 
$this->aggressiveSplitting );
                $requiredAnalyzers = $analysisConfig->buildConfig();
-               if ( $this->checkConfig( $settings[ 'index' ][ 'analysis' ], 
$requiredAnalyzers ) ) {
+               if ( $this->checkConfig( $settings[ 'analysis' ], 
$requiredAnalyzers ) ) {
                        $this->output( "ok\n" );
                } else {
                        $this->output( "different..." );
@@ -308,23 +308,10 @@
 
        /**
         * Load the settings array.  You can't use this to set the settings, 
use $this->getIndex()->getSettings() for that.
-        * @return array of settings always in Elasticsearch 1.0 format
+        * @return array of settings
         */
        private function getSettings() {
-               $settings = $this->getIndex()->getSettings()->get();
-               if ( !$this->elasticsearch90 ) { // 1.0 compat
-                       // Looks like we're already in the new format already
-                       return $settings;
-               }
-               $to = array();
-               foreach ( $settings as $key => $value ) {
-                       $current = &$to;
-                       foreach ( explode( '.', $key ) as $segment ) {
-                               $current = &$current[ $segment ];
-                       }
-                       $current = $value;
-               }
-               return $to;
+               return $this->getIndex()->getSettings()->get();
        }
 
        private function validateMapping() {
@@ -332,10 +319,6 @@
                $requiredPageMappings = new MappingConfigBuilder(
                        $this->prefixSearchStartsWithAny, $this->phraseUseText 
);
                $requiredPageMappings = $requiredPageMappings->buildConfig();
-
-               if ( $this->elasticsearch90 ) {
-                       $requiredPageMappings = 
$this->transformMappingToElasticsearch90( $requiredPageMappings );
-               }
 
                if ( !$this->checkMapping( $requiredPageMappings ) ) {
                        // TODO Conflict resolution here might leave old 
portions of mappings
@@ -354,26 +337,6 @@
                }
        }
 
-       private function transformMappingToElasticsearch90( $mapping ) {
-               foreach ( $mapping[ 'properties' ] as $name => $field ) {
-                       if ( array_key_exists( 'fields', $field ) ) {
-                               // Move the main field configuration to a 
subfield
-                               $field[ 'fields' ][ $name ] = $field;
-                               unset( $field[ 'fields' ][ $name ][ 'fields' ] 
);
-                               // Now clear everything from the "main" field 
that we've moved
-                               $mapping[ 'properties' ][ $name ] = array(
-                                       'type' => 'multi_field',
-                                       'fields' => $field[ 'fields' ],
-                               );
-                       }
-                       if ( array_key_exists( 'properties', $field ) ) {
-                               $mapping[ 'properties' ][ $name ] = 
$this->transformMappingToElasticsearch90(
-                                       $field );
-                       }
-               }
-               return $mapping;
-       }
-
        /**
         * Check that the mapping returned from Elasticsearch is as we want it.
         * @param array $requiredPageMappings the mappings we want
@@ -382,15 +345,6 @@
        private function checkMapping( $requiredPageMappings ) {
                $indexName = $this->getIndex()->getName();
                $actualMappings = $this->getIndex()->getMapping();
-               if ( !array_key_exists( $indexName, $actualMappings ) ) {
-                       $this->output( "nonexistent...");
-                       return false;
-               }
-               $actualMappings = $actualMappings[ $indexName ];
-               if ( !$this->elasticsearch90 ) { // 1.0 compat
-                       $actualMappings = $actualMappings[ 'mappings' ];
-               }
-
                $this->output( "\n" . $this->indent . "\tValidating mapping for 
page type..." );
                if ( array_key_exists( 'page', $actualMappings ) &&
                                $this->checkConfig( $actualMappings[ 'page' ], 
$requiredPageMappings ) ) {

-- 
To view, visit https://gerrit.wikimedia.org/r/117317
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: I08e1c2f30d11cab0495e7faa860940440ef1ad3c
Gerrit-PatchSet: 1
Gerrit-Project: mediawiki/extensions/CirrusSearch
Gerrit-Branch: master
Gerrit-Owner: Manybubbles <never...@wikimedia.org>

_______________________________________________
MediaWiki-commits mailing list
MediaWiki-commits@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to