Manybubbles has uploaded a new change for review. https://gerrit.wikimedia.org/r/117317
Change subject: [WIP] Drop 0.90 compatibility ...................................................................... [WIP] Drop 0.90 compatibility No need for funky 1.0 backwards compatibility stuff with script fields. Replaced with _source filtering which is way easier to work with. Elastica upgrade required a few changed because it made the settings results simpler. Change-Id: I08e1c2f30d11cab0495e7faa860940440ef1ad3c --- M includes/ElasticsearchIntermediary.php M includes/Result.php M includes/ResultsType.php M includes/Searcher.php M maintenance/updateOneSearchIndexConfig.php 5 files changed, 29 insertions(+), 117 deletions(-) git pull ssh://gerrit.wikimedia.org:29418/mediawiki/extensions/CirrusSearch refs/changes/17/117317/1 diff --git a/includes/ElasticsearchIntermediary.php b/includes/ElasticsearchIntermediary.php index e7b883c..8802521 100644 --- a/includes/ElasticsearchIntermediary.php +++ b/includes/ElasticsearchIntermediary.php @@ -115,28 +115,6 @@ } /** - * Unwrap a result that we expect to be a single value. - * @param mixed $data from Elastica result - * @return mixed the single result - */ - public static function singleValue( $result, $name ) { - $data = $result->__get( $name ); - if ( $data === null ) { - return null; - } - // Elasticsearch 0.90 returns single results as, well, single results - if ( !is_array( $data ) ) { - return $data; - } - // Elasticsearch 1.0 returns them as single node arrays - $count = count( $data ); - if ( $count !== 1 ) { - wfLogWarning( "Expected a single result for $name but got $count." ); - } - return $data[ 0 ]; - } - - /** * Does this status represent an Elasticsearch parse error? * @param $status Status to check * @return boolean is this a parse error? diff --git a/includes/Result.php b/includes/Result.php index a07bf0d..9a69f06 100644 --- a/includes/Result.php +++ b/includes/Result.php @@ -46,8 +46,7 @@ global $wgCirrusSearchShowScore; $this->maybeSetInterwiki( $result, $interwikis ); - $this->mTitle = Title::makeTitle( ElasticsearchIntermediary::singleValue( $result, 'namespace' ), - ElasticsearchIntermediary::singleValue( $result, 'title' ), '', $this->interwiki ); + $this->mTitle = Title::makeTitle( $result->namespace, $result->title, '', $this->interwiki ); if ( $this->getTitle()->getNamespace() == NS_FILE ) { $this->mImage = wfFindFile( $this->mTitle ); } @@ -55,8 +54,8 @@ $data = $result->getData(); // Not all results requested a word count. Just pretend we have none if so $this->wordCount = isset( $data['text.word_count'] ) ? $data['text.word_count'] : 0; - $this->byteSize = ElasticsearchIntermediary::singleValue( $result, 'text_bytes' ); - $this->timestamp = ElasticsearchIntermediary::singleValue( $result, 'timestamp' ); + $this->byteSize = $result->text_bytes; + $this->timestamp = $result->timestamp; $this->timestamp = new MWTimestamp( $this->timestamp ); $highlights = $result->getHighlights(); // TODO remove when Elasticsearch issue 3757 is fixed diff --git a/includes/ResultsType.php b/includes/ResultsType.php index 01486bd..1b3957e 100644 --- a/includes/ResultsType.php +++ b/includes/ResultsType.php @@ -22,8 +22,11 @@ * http://www.gnu.org/copyleft/gpl.html */ interface ResultsType { - function getFields(); - function getScriptFields(); + /** + * Get the source filtering to be used loading the result. + * @return false|string|array corresonding to Elasticsearch source filtering syntax + */ + function getSourceFiltering(); function getHighlightingConfiguration(); function transformElasticsearchResult( $suggestPrefixes, $suggestSuffixes, $result, $searchContainedSyntax ); @@ -44,12 +47,8 @@ $this->matchedAnalyzer = $matchedAnalyzer; } - public function getFields() { + public function getSourceFiltering() { return array( 'namespace', 'title' ); - } - - public function getScriptFields() { - return null; } public function getHighlightingConfiguration() { @@ -80,8 +79,7 @@ $result, $searchContainedSyntax ) { $results = array(); foreach( $result->getResults() as $r ) { - $title = Title::makeTitle( ElasticsearchIntermediary::singleValue( $r, 'namespace' ), - ElasticsearchIntermediary::singleValue( $r, 'title' ) ); + $title = Title::makeTitle( $r->namespace, $r->title ); $highlights = $r->getHighlights(); // Now we have to use the highlights to figure out whether it was the title or the redirect @@ -98,7 +96,7 @@ // Instead of getting the redirect's real namespace we're going to just use the namespace // of the title. This is not great but OK given that we can't find cross namespace // redirects properly any way. - $title = Title::makeTitle( ElasticsearchIntermediary::singleValue( $r, 'namespace' ), $redirectTitle ); + $title = Title::makeTitle( $r->namespace, $redirectTitle ); } else if ( !isset( $highlights[ "title.$this->matchedAnalyzer" ] ) ) { // We're not really sure where the match came from so lets just pretend it was the title? wfDebugLog( 'CirrusSearch', "Title search result type hit a match but we can't " . @@ -114,17 +112,8 @@ } class FullTextResultsType implements ResultsType { - public function getFields() { - return array( 'id', 'title', 'namespace', - // 'redirect', was moved to a script field.... - 'timestamp', 'text_bytes', 'text.word_count' ); - } - - public function getScriptFields() { - // Works for both 1.0 and 0.90. - // TODO remove this in favor of source filtering when we remove support for 0.90. - // see http://www.elasticsearch.org/guide/en/elasticsearch/reference/1.x/search-request-source-filtering.html - return array( 'redirect' => new \Elastica\Script( '_source[ "redirect" ]' ) ); + public function getSourceFiltering() { + return array( 'id', 'title', 'namespace', 'redirect.*', 'timestamp', 'text_bytes', 'text.word_count' ); } /** @@ -211,11 +200,7 @@ return null; } - public function getFields() { + public function getSourceFiltering() { return array( 'namespace', 'namespace_text', 'title' ); - } - - public function getScriptFields() { - return null; } } diff --git a/includes/Searcher.php b/includes/Searcher.php index 30e0a9c..73c3cd5 100644 --- a/includes/Searcher.php +++ b/includes/Searcher.php @@ -695,11 +695,7 @@ } $query = new Elastica\Query(); - $query->setFields( $this->resultsType->getFields() ); - $scriptFields = $this->resultsType->getScriptFields(); - if ( $scriptFields ) { - $query->setScriptFields( $scriptFields ); - } + $query->setParam( '_source', $this->resultsType->getSourceFiltering() ); $extraIndexes = array(); if ( $this->namespaces ) { diff --git a/maintenance/updateOneSearchIndexConfig.php b/maintenance/updateOneSearchIndexConfig.php index ce7fb8d..601b745 100644 --- a/maintenance/updateOneSearchIndexConfig.php +++ b/maintenance/updateOneSearchIndexConfig.php @@ -98,11 +98,6 @@ */ private $reindexAcceptableCountDeviation; - /** - * @var bool is this Elasticsearch 0.90.x? - */ - private $elasticsearch90; - public function __construct() { parent::__construct(); $this->addDescription( "Update the configuration or contents of one search index." ); @@ -192,7 +187,7 @@ implode( ', ', $indexTypes ), 1 ); } - $this->fetchElasticsearchVersion(); + $this->checkElasticsearchVersion(); if ( $this->getOption( 'forceOpen', false ) ) { $this->getIndex()->open(); @@ -221,13 +216,18 @@ } } - private function fetchElasticsearchVersion() { + private function checkElasticsearchVersion() { $this->output( $this->indent . 'Fetching Elasticsearch version...' ); $result = Connection::getClient()->request( '' ); $result = $result->getData(); $result = $result[ 'version' ][ 'number' ]; - $this->elasticsearch90 = strpos( $result, '0.90.' ) !== false; - $this->output( ( $this->elasticsearch90 ? '' : 'after ' ) . "0.90\n" ); + $this->output( "$result..." ); + if ( strpos( $result, '1.' ) !== 0 ) { + $this->output( "Not supported!\n" ); + $this->error( "Only Elasticsearch 1.x is supported. Your version: $result.", 1 ); + } else { + $this->output( "OK\n" ); + } } private function updateVersions() { @@ -258,7 +258,7 @@ private function validateIndexSettings() { $this->output( $this->indent . "\tValidating number of shards..." ); $settings = $this->getSettings(); - $actualShardCount = $settings[ 'index' ][ 'number_of_shards' ]; + $actualShardCount = $settings[ 'number_of_shards' ]; if ( $actualShardCount == $this->getShardCount() ) { $this->output( "ok\n" ); } else { @@ -271,7 +271,7 @@ } $this->output( $this->indent . "\tValidating number of replicas..." ); - $actualReplicaCount = $settings[ 'index' ][ 'number_of_replicas' ]; + $actualReplicaCount = $settings[ 'number_of_replicas' ]; if ( $actualReplicaCount == $this->getReplicaCount() ) { $this->output( "ok\n" ); } else { @@ -286,7 +286,7 @@ $settings = $this->getSettings(); $analysisConfig = new AnalysisConfigBuilder( $this->langCode, $this->aggressiveSplitting ); $requiredAnalyzers = $analysisConfig->buildConfig(); - if ( $this->checkConfig( $settings[ 'index' ][ 'analysis' ], $requiredAnalyzers ) ) { + if ( $this->checkConfig( $settings[ 'analysis' ], $requiredAnalyzers ) ) { $this->output( "ok\n" ); } else { $this->output( "different..." ); @@ -308,23 +308,10 @@ /** * Load the settings array. You can't use this to set the settings, use $this->getIndex()->getSettings() for that. - * @return array of settings always in Elasticsearch 1.0 format + * @return array of settings */ private function getSettings() { - $settings = $this->getIndex()->getSettings()->get(); - if ( !$this->elasticsearch90 ) { // 1.0 compat - // Looks like we're already in the new format already - return $settings; - } - $to = array(); - foreach ( $settings as $key => $value ) { - $current = &$to; - foreach ( explode( '.', $key ) as $segment ) { - $current = &$current[ $segment ]; - } - $current = $value; - } - return $to; + return $this->getIndex()->getSettings()->get(); } private function validateMapping() { @@ -332,10 +319,6 @@ $requiredPageMappings = new MappingConfigBuilder( $this->prefixSearchStartsWithAny, $this->phraseUseText ); $requiredPageMappings = $requiredPageMappings->buildConfig(); - - if ( $this->elasticsearch90 ) { - $requiredPageMappings = $this->transformMappingToElasticsearch90( $requiredPageMappings ); - } if ( !$this->checkMapping( $requiredPageMappings ) ) { // TODO Conflict resolution here might leave old portions of mappings @@ -354,26 +337,6 @@ } } - private function transformMappingToElasticsearch90( $mapping ) { - foreach ( $mapping[ 'properties' ] as $name => $field ) { - if ( array_key_exists( 'fields', $field ) ) { - // Move the main field configuration to a subfield - $field[ 'fields' ][ $name ] = $field; - unset( $field[ 'fields' ][ $name ][ 'fields' ] ); - // Now clear everything from the "main" field that we've moved - $mapping[ 'properties' ][ $name ] = array( - 'type' => 'multi_field', - 'fields' => $field[ 'fields' ], - ); - } - if ( array_key_exists( 'properties', $field ) ) { - $mapping[ 'properties' ][ $name ] = $this->transformMappingToElasticsearch90( - $field ); - } - } - return $mapping; - } - /** * Check that the mapping returned from Elasticsearch is as we want it. * @param array $requiredPageMappings the mappings we want @@ -382,15 +345,6 @@ private function checkMapping( $requiredPageMappings ) { $indexName = $this->getIndex()->getName(); $actualMappings = $this->getIndex()->getMapping(); - if ( !array_key_exists( $indexName, $actualMappings ) ) { - $this->output( "nonexistent..."); - return false; - } - $actualMappings = $actualMappings[ $indexName ]; - if ( !$this->elasticsearch90 ) { // 1.0 compat - $actualMappings = $actualMappings[ 'mappings' ]; - } - $this->output( "\n" . $this->indent . "\tValidating mapping for page type..." ); if ( array_key_exists( 'page', $actualMappings ) && $this->checkConfig( $actualMappings[ 'page' ], $requiredPageMappings ) ) { -- To view, visit https://gerrit.wikimedia.org/r/117317 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: newchange Gerrit-Change-Id: I08e1c2f30d11cab0495e7faa860940440ef1ad3c Gerrit-PatchSet: 1 Gerrit-Project: mediawiki/extensions/CirrusSearch Gerrit-Branch: master Gerrit-Owner: Manybubbles <never...@wikimedia.org> _______________________________________________ MediaWiki-commits mailing list MediaWiki-commits@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits