jenkins-bot has submitted this change and it was merged.

Change subject: Refactor the suggester.
......................................................................


Refactor the suggester.

* Refactored some fragile design/methods
* Moved the variant fallback queries into CompletionSuggester.php
* Added unit tests

Change-Id: Iab87083841481107daf8048ddfabef3c8f284f79
---
M autoload.php
M includes/Api/Suggest.php
M includes/CirrusSearch.php
M includes/CompletionSuggester.php
M includes/ElasticsearchIntermediary.php
M includes/Search/SearchSuggestion.php
M includes/Search/SearchSuggestionSet.php
M includes/UserTesting.php
M scripts/gen-autoload.php
M tests/browser/features/suggest_api.feature
M tests/browser/features/support/hooks.rb
A tests/unit/CompletionSuggesterTest.php
M tests/unit/RescoreBuilderTest.php
M tests/unit/Search/SearchSuggestionSetTest.php
A tests/unit/TestUtils.php
M tests/unit/UserTestingTest.php
16 files changed, 571 insertions(+), 168 deletions(-)

Approvals:
  EBernhardson: Looks good to me, approved
  jenkins-bot: Verified



diff --git a/autoload.php b/autoload.php
index ddfdd74..6d7c0b2 100644
--- a/autoload.php
+++ b/autoload.php
@@ -117,4 +117,6 @@
        'CirrusSearch\\UserTesting' => __DIR__ . '/includes/UserTesting.php',
        'CirrusSearch\\Util' => __DIR__ . '/includes/Util.php',
        'CirrusSearch\\Version' => __DIR__ . '/includes/Version.php',
+       'CirrusSearch\\Test\\HashSearchConfig' => __DIR__ . 
'/tests/unit/TestUtils.php',
+       'CirrusSearch\\Test\\DummyConnection' => __DIR__ . 
'/tests/unit/TestUtils.php',
 );
diff --git a/includes/Api/Suggest.php b/includes/Api/Suggest.php
index a455a3d..944be7c 100644
--- a/includes/Api/Suggest.php
+++ b/includes/Api/Suggest.php
@@ -40,7 +40,6 @@
                }
 
                $suggestions = $cirrus->searchSuggestions( $queryText );
-
                // Use the same cache options used by OpenSearch
                $this->getMain()->setCacheMaxAge( $this->getConfig()->get( 
'SearchSuggestCacheExpiry' ) );
                $this->getMain()->setCacheMode( 'public' );
diff --git a/includes/CirrusSearch.php b/includes/CirrusSearch.php
index c0a0bd3..7927209 100644
--- a/includes/CirrusSearch.php
+++ b/includes/CirrusSearch.php
@@ -390,50 +390,22 @@
                $suggester = new CompletionSuggester( $this->connection, 
$this->limit,
                        $config, $this->namespaces, $user, $this->indexBaseName 
);
 
-               $response = $suggester->suggest( $search );
-               $suggestions = SearchSuggestionSet::emptySuggestionSet();
+               // Not really useful, mostly for testing purpose
+               $variants = $request->getArray( 
'cirrusCompletionSuggesterVariant' );
+               if ( empty( $variants ) ) {
+                       global $wgContLang;
+                       $variants = $wgContLang->autoConvertToAllVariants( 
$search );
+               } else if ( count( $variants ) > 3 ) {
+                       // We should not allow too many variants
+                       $variants = array_slice( $variants, 0, 3 );
+               }
+
+               $response = $suggester->suggest( $search, $variants );
                if ( $response->isOK() ) {
                        // Errors will be logged, let's try the exact db match
                        $suggestions = $response->getValue();
-               }
-
-               // if the content language has variants, try to retrieve 
fallback results
-               $fallbackLimit = $this->limit - $suggestions->getSize();;
-
-               // Copied from PrefixSearch
-               // @todo: verify if this is really needed, if variants are
-               // close enough fuzzy suggestions could already cover this
-               // usecase.
-               if ( $fallbackLimit > 0 ) {
-                       global $wgContLang;
-
-                       $fallbackSearches = 
$wgContLang->autoConvertToAllVariants( $search );
-                       $fallbackSearches = array_diff( array_unique( 
$fallbackSearches ), array( $search ) );
-
-                       $suggester->setLimit( $fallbackLimit );
-                       foreach ( $fallbackSearches as $fbs ) {
-                               $fallbackResponse = $suggester->suggest( $fbs );
-                               if ( !$fallbackResponse->isOK() ) {
-                                       continue;
-                               }
-                               $pageIds = $suggestions->map( function( $sugg ) 
{
-                                       return $sugg->getSuggestedTitleID();
-                               });
-
-                               $fallbackSuggestions = 
$fallbackResponse->getValue();
-                               // Same page can be returned (fuzzy suggestions)
-                               foreach( $fallbackSuggestions->getSuggestions() 
as $s ) {
-                                       if ( !in_array ( 
$s->getSuggestedTitleID(), $pageIds ) ) {
-                                               $suggestions->addSuggestion( $s 
);
-                                       }
-                               }
-
-                               $fallbackLimit = $this->limit - 
$suggestions->getSize();
-
-                               if ( $fallbackLimit <= 0 ) {
-                                       break;
-                               }
-                       }
+               } else {
+                       $suggestions = 
SearchSuggestionSet::emptySuggestionSet();
                }
 
                // preload the titles with LinkBatch
@@ -454,21 +426,17 @@
                $rescoredResults = $rescorer->rescore( $search, 
$this->namespaces, $results, $this->limit );
 
                if( count( $rescoredResults ) > 0 ) {
-                       if ( !in_array( reset( $rescoredResults ), $results ) ) 
{
+                       $found = array_search( $rescoredResults[0], $results );
+                       if ( $found === false ) {
                                // If the first result is not in the previous 
array it
                                // means that we found a new exact match
-                               $exactTitle = Title::newFromText( reset( 
$rescoredResults ) );
-                               $exactMatch = new SearchSuggestion();
-                               $exactMatch->setText( 
$exactTitle->getPrefixedText() );
-                               $exactMatch->setSuggestedTitle( $exactTitle, 
true );
-                               $exactMatch->setScore( 0 );
-                               $suggestions->insertBestSuggestion( $exactMatch 
);
+                               $exactMatch = SearchSuggestion::fromTitle( 0, 
Title::newFromText( $rescoredResults[0] ) );
+                               $suggestions->prepend( $exactMatch );
                                $suggestions->shrink( $this->limit );
                        } else {
                                // if the first result is not the same we need 
to rescore
-                               if( reset( $rescoredResults ) != reset( 
$results ) ) {
-                                       $rescoredIndex = array_search( reset( 
$rescoredResults ), $results );
-                                       $suggestions->rescore( $rescoredIndex );
+                               if( $found > 0 ) {
+                                       $suggestions->rescore( $found );
                                }
                        }
                }
diff --git a/includes/CompletionSuggester.php b/includes/CompletionSuggester.php
index e0edd4d..1df1d2e 100644
--- a/includes/CompletionSuggester.php
+++ b/includes/CompletionSuggester.php
@@ -13,7 +13,6 @@
 use Title;
 use User;
 use Elastica\Request;
-use Elastica\Exception\ResponseException;
 
 /**
  * Performs search as you type queries using Completion Suggester.
@@ -82,10 +81,23 @@
  * some workarounds (https://github.com/elastic/elasticsearch/issues/10746).
  */
 class CompletionSuggester extends ElasticsearchIntermediary {
+       const VARIANT_EXTRA_DISCOUNT = 0.0001;
        /**
         * @var string term to search.
         */
        private $term;
+
+       /**
+        * @var string[]|null search variants
+        */
+       private $variants;
+
+       /**
+        * Currently very limited (see LIMITATIONS) and only works
+        * for geo context
+        * @var array|null context for contextualized suggestions
+        */
+       private $context;
 
        /**
         * @var integer maximum number of result
@@ -103,6 +115,11 @@
         * Specified as public because of closures. When we move to non-anicent 
PHP version, can be made protected.
         */
        public $config;
+
+       /**
+        * @var string Query type (comp_suggest_geo or comp_suggest)
+        */
+       public $queryType;
 
        /**
         * @var SearchContext
@@ -140,52 +157,16 @@
         * WARNING: experimental API
         *
         * @param string $text Search term
+        * @param string[]|null $variants Search term variants
+        * (usually issued from $wgContLang->autoConvertToAllVariants( $text ) )
         * @param array $context
         * @return Status
         */
-       public function suggest( $text, $context = null ) {
-               // Do not remove spaces at the end, the user might tell us he 
finished writing a word
-               $this->term = ltrim( $text );
+       public function suggest( $text, $variants = null, $context = null ) {
+               $this->setTermAndVariants( $text, $variants );
+               $this->context = $context;
 
-               if ( mb_strlen( $this->term ) > 
SuggestBuilder::MAX_INPUT_LENGTH ) {
-                       // Trim the query otherwise we won't find results
-                       $this->term = mb_substr( $this->term, 0, 
SuggestBuilder::MAX_INPUT_LENGTH );
-               }
-
-               $suggest = array( 'text' => $text );
-               $queryLen = mb_strlen( trim( $text ) ); // Avoid cheating with 
spaces
-               $queryType = "comp_suggest";
-
-               $profiles = $this->config->get( 
'CirrusSearchCompletionSettings' );
-               if ( $context != null && isset( $context['geo']['lat'] ) && 
isset( $context['geo']['lon'] )
-                       && is_numeric( $context['geo']['lat'] ) && is_numeric( 
$context['geo']['lon'] )
-               ) {
-                       $profiles = $this->prepareGeoContextSuggestProfiles( 
$context );
-                       $queryType = "comp_suggest_geo";
-               }
-
-               foreach ( $profiles as $name => $config ) {
-                       if ( $config['min_query_len'] > $queryLen ) {
-                               continue;
-                       }
-                       if ( isset( $config['max_query_len'] ) && $queryLen > 
$config['max_query_len'] ) {
-                               continue;
-                       }
-                       $field = $config['field'];
-                       $suggest[$name] = array(
-                               'completion' => array(
-                                       'field' => $field,
-                                       'size' => $this->limit * 
$config['fetch_limit_factor']
-                               )
-                       );
-                       if ( isset( $config['fuzzy'] ) ) {
-                               $suggest[$name]['completion']['fuzzy'] = 
$config['fuzzy'];
-                       }
-                       if ( isset( $config['context'] ) ) {
-                               $suggest[$name]['completion']['context'] = 
$config['context'];
-                       }
-               }
-
+               list( $profiles, $suggest ) = $this->buildQuery();
                $queryOptions = array();
                $queryOptions[ 'timeout' ] = $this->config->getElement( 
'CirrusSearchSearchShardTimeout', 'default' );
                $this->connection->setTimeout( $queryOptions[ 'timeout' ] );
@@ -193,7 +174,7 @@
                $index = $this->connection->getIndex( $this->indexBaseName, 
Connection::TITLE_SUGGEST_TYPE );
                $logContext = array(
                        'query' => $text,
-                       'queryType' => $queryType,
+                       'queryType' => $this->queryType,
                );
                $searcher = $this;
                $limit = $this->limit;
@@ -207,7 +188,7 @@
                                try {
                                        $result = $index->request( "_suggest", 
Request::POST, $suggest, $queryOptions );
                                        if( $result->isOk() ) {
-                                               $result = 
$searcher->postProcessSuggest( $text, $result,
+                                               $result = 
$searcher->postProcessSuggest( $result,
                                                        $profiles, $limit );
                                                return $searcher->success( 
$result );
                                        }
@@ -221,13 +202,157 @@
        }
 
        /**
+        * protected for tests
+        */
+       protected function setTermAndVariants( $term, $variants = null ) {
+               $this->term = $term;
+               if ( empty( $variants ) ) {
+                       $this->variants = null;
+                       return;
+               }
+               $variants = array_diff( array_unique( $variants ), array( $term 
) );
+               if ( empty( $variants ) ) {
+                       $this->variants = null;
+               } else {
+                       $this->variants = $variants;
+               }
+       }
+
+       /**
+        * Builds the suggest queries and profiles.
+        * Use with list( $profiles, $suggest ).
+        * @return array the profiles and suggest queries
+        */
+       protected function buildQuery() {
+               if ( mb_strlen( $this->term ) > 
SuggestBuilder::MAX_INPUT_LENGTH ) {
+                       // Trim the query otherwise we won't find results
+                       $this->term = mb_substr( $this->term, 0, 
SuggestBuilder::MAX_INPUT_LENGTH );
+               }
+
+               $queryLen = mb_strlen( trim( $this->term ) ); // Avoid cheating 
with spaces
+               $this->queryType = "comp_suggest";
+
+               $profiles = $this->config->get( 
'CirrusSearchCompletionSettings' );
+               if ( $this->context != null && isset( 
$this->context['geo']['lat'] )
+                       && isset( $this->context['geo']['lon'] ) && is_numeric( 
$this->context['geo']['lat'] )
+                       && is_numeric( $this->context['geo']['lon'] )
+               ) {
+                       $profiles = $this->prepareGeoContextSuggestProfiles();
+                       $this->queryType = "comp_suggest_geo";
+               }
+
+               $suggest = $this->buildSuggestQueries( $profiles, $this->term, 
$queryLen );
+
+               // Handle variants, update the set of profiles and suggest 
queries
+               if ( !empty( $this->variants ) ) {
+                       list( $addProfiles, $addSuggest ) = 
$this->handleVariants( $profiles, $queryLen );
+                       $profiles += $addProfiles;
+                       $suggest += $addSuggest;
+               }
+               return array( $profiles, $suggest );
+       }
+
+       /**
+        * Builds a set of suggest query by reading the list of profiles
+        * @param array $profiles
+        * @param string $query
+        * @param int $queryLen the length to use when checking 
min/max_query_len
+        * @return array a set of suggest queries ready to for elastic
+        */
+       protected function buildSuggestQueries( array $profiles, $query, 
$queryLen ) {
+               $suggest = array();
+               foreach($profiles as $name => $config) {
+                       $sugg = $this->buildSuggestQuery( $config, $query, 
$queryLen );
+                       if(!$sugg) {
+                               continue;
+                       }
+                       $suggest[$name] = $sugg;
+               }
+               return $suggest;
+       }
+
+       /**
+        * Builds a suggest query from a profile
+        * @param array $config Profile
+        * @param string $query
+        * @param int $queryLen the length to use when checking 
min/max_query_len
+        * @return array|null suggest query ready to for elastic or null
+        */
+       protected function buildSuggestQuery( array $config, $query, $queryLen 
) {
+               // Do not remove spaces at the end, the user might tell us he 
finished writing a word
+               $query = ltrim( $query );
+               if ( $config['min_query_len'] > $queryLen ) {
+                       return null;
+               }
+               if ( isset( $config['max_query_len'] ) && $queryLen > 
$config['max_query_len'] ) {
+                       return null;
+               }
+               $field = $config['field'];
+               $suggest = array(
+                       'text' => $query,
+                       'completion' => array(
+                               'field' => $field,
+                               'size' => $this->limit * 
$config['fetch_limit_factor']
+                       )
+               );
+               if ( isset( $config['fuzzy'] ) ) {
+                       $suggest['completion']['fuzzy'] = $config['fuzzy'];
+               }
+               if ( isset( $config['context'] ) ) {
+                       $suggest['completion']['context'] = $config['context'];
+               }
+               return $suggest;
+       }
+
+       /**
+        * Update the suggest queries and return additional profiles flagged 
the 'fallback' key
+        * with a discount factor = originalDiscount * 0.0001/(variantIndex+1).
+        * @param array $profiles the default profiles
+        * @param array $suggest the current set of suggest queries
+        * @param string[]|null $variants the list of variant queries
+        * @param string $search the original query
+        * @param int $queryLen the original query length
+        * @return array new variant profiles
+        */
+        protected function handleVariants( array $profiles, $queryLen ) {
+               $variantIndex = 0;
+               $allVariantProfiles = array();
+               $allSuggestions = array();
+               foreach( $this->variants as $variant ) {
+                       $variantIndex++;
+                       foreach ( $profiles as $name => $profile ) {
+                               $variantProfName = $name . '-variant-' . 
$variantIndex;
+                               $allVariantProfiles[$variantProfName] = 
$this->buildVariantProfile( $profile, 
self::VARIANT_EXTRA_DISCOUNT/$variantIndex );
+                               $allSuggestions[$variantProfName] = 
$this->buildSuggestQuery(
+                                                       
$allVariantProfiles[$variantProfName], $variant, $queryLen
+                                               );
+                       }
+               }
+               return array( $allVariantProfiles, $allSuggestions );
+       }
+
+       /**
+        * Creates a copy of $profile[$name] with a custom '-variant-SEQ' 
suffix.
+        * And applies an extra discount factor of 0.0001.
+        * The copy is added to the profiles container.
+        * @param array $profile profile to copy
+        * @param float $extraDiscount extra discount factor to rank variant 
suggestion lower.
+        */
+       protected function buildVariantProfile( array $profile, $extraDiscount 
= 0.0001 ) {
+               // mark the profile as a fallback query
+               $profile['fallback'] = true;
+               $profile['discount'] *= $extraDiscount;
+               return $profile;
+       }
+
+       /**
         * prepare the list of suggest requests used for geo context suggestions
         * This method will merge $this->config->get( 
'CirrusSearchCompletionSettings and
         * $this->config->get( 'CirrusSearchCompletionGeoContextSettings
         * @param array $context user's geo context
         * @return array of suggest request profiles
         */
-       private function prepareGeoContextSuggestProfiles( $context ) {
+       private function prepareGeoContextSuggestProfiles() {
                $profiles = array();
                foreach ( $this->config->get( 
'CirrusSearchCompletionGeoContextSettings' ) as $geoname => $geoprof ) {
                        foreach ( $this->config->get( 
'CirrusSearchCompletionSettings' ) as $sugname => $sugprof ) {
@@ -239,8 +364,8 @@
                                $profile['discount'] *= $geoprof['discount'];
                                $profile['context'] = array(
                                        'location' => array(
-                                               'lat' => $context['geo']['lat'],
-                                               'lon' => $context['geo']['lon'],
+                                               'lat' => 
$this->context['geo']['lat'],
+                                               'lon' => 
$this->context['geo']['lon'],
                                                'precision' => 
$geoprof['precision']
                                        )
                                );
@@ -261,7 +386,7 @@
         * @param int $limit Maximum suggestions to return, -1 for unlimited
         * @return SearchSuggestionSet a set of Suggestions
         */
-       protected function postProcessSuggest( $query, \Elastica\Response 
$response, $profiles, $limit = -1 ) {
+       protected function postProcessSuggest( \Elastica\Response $response, 
$profiles, $limit = -1 ) {
                $this->logContext['elasticTookMs'] = intval( 
$response->getQueryTime() * 1000 );
                $data = $response->getData();
                unset( $data['_shards'] );
@@ -283,7 +408,7 @@
                                        if ( !isset( $suggestions[$pageId] ) ||
                                                $score > 
$suggestions[$pageId]->getScore()
                                        ) {
-                                               $suggestion = new 
SearchSuggestion( null, null, $score, null, $pageId );
+                                               $suggestion = new 
SearchSuggestion( $score, null, null, $pageId );
                                                // If it's a title suggestion 
we have the text
                                                if ( $type === 
SuggestBuilder::TITLE_SUGGESTION ) {
                                                        $suggestion->setText( 
$output['text'] );
@@ -337,47 +462,39 @@
                                if ( $redirResponse->isOk() ) {
                                        $this->logContext['elasticTook2PassMs'] 
= intval( $redirResponse->getQueryTime() * 1000 );
                                        $docs = $redirResponse->getData();
-                                       $docs = $docs['docs'];
-                                       foreach ( $docs as $doc ) {
-                                               $id = $doc['_id'];
-                                               if ( !isset( 
$doc['_source']['redirect'] )
-                                                       || empty( 
$doc['_source']['redirect'] )
-                                               ) {
+                                       foreach ( $docs['docs'] as $doc ) {
+                                               if ( empty( 
$doc['_source']['redirect'] ) ) {
                                                        continue;
                                                }
-                                               $text = 
Util::chooseBestRedirect( $query, $doc['_source']['redirect'] );
-                                               $suggestions[$id]->setText( 
$text );
+                                               // We use the original query, 
we should maybe use the variant that generated this result?
+                                               $text = 
Util::chooseBestRedirect( $this->term, $doc['_source']['redirect'] );
+                                               if( !empty( 
$suggestions[$doc['_id']] ) ) {
+                                                       
$suggestions[$doc['_id']]->setText( $text );
+                                               }
                                        }
                                } else {
                                        LoggerFactory::getInstance( 
'CirrusSearch' )->warning(
                                                'Unable to fetch redirects for 
suggestion {query} with results {ids} : {error}',
-                                               array( 'query' => $query,
+                                               array( 'query' => $this->term,
                                                        'ids' => serialize( 
$missingText ),
                                                        'error' => 
$redirResponse->getError() ) );
                                }
                        } catch ( \Elastica\Exception\ExceptionInterface $e ) {
                                LoggerFactory::getInstance( 'CirrusSearch' 
)->warning(
                                        'Unable to fetch redirects for 
suggestion {query} with results {ids} : {error}',
-                                       array( 'query' => $query,
+                                       array( 'query' => $this->term,
                                                'ids' => serialize( 
$missingText ),
                                                'error' => 
$this->extractMessage( $e ) ) );
                        }
                }
 
-               $retval = array();
-               foreach ( $suggestions as $suggestion ) {
-                       if ( $suggestion->getText() === null ) {
-                               // We were unable to find a text to display
-                               // Maybe a page with redirects when we built 
the suggester index
-                               // but now without redirects?
-                               continue;
+               return new SearchSuggestionSet( array_filter(
+                       $suggestions,
+                       function ( $suggestion ) {
+                               // text should be not empty for suggestions
+                               return $suggestion->getText() != null;
                        }
-                       // Populate the SearchSuggestion object
-                       $suggestion->setSuggestedTitle( Title::makeTitle( 0, 
$suggestion->getText() ), true );
-                       $retval[] = $suggestion;
-               }
-
-               return new SearchSuggestionSet( $retval );
+               ) );
        }
 
        /**
diff --git a/includes/ElasticsearchIntermediary.php 
b/includes/ElasticsearchIntermediary.php
index 7738c82..12992e6 100644
--- a/includes/ElasticsearchIntermediary.php
+++ b/includes/ElasticsearchIntermediary.php
@@ -109,6 +109,13 @@
        }
 
        /**
+        * Unit tests only
+        */
+       public static function resetExecutionId() {
+               self::$executionId = null;
+       }
+
+       /**
         * Summarizes all the requests made in this process and reports
         * them along with the test they belong to.
         * Only public due to php 5.3 not having access from closures
diff --git a/includes/Search/SearchSuggestion.php 
b/includes/Search/SearchSuggestion.php
index ec46971..e39a982 100644
--- a/includes/Search/SearchSuggestion.php
+++ b/includes/Search/SearchSuggestion.php
@@ -58,17 +58,17 @@
 
        /**
         * Construct a new suggestion
+        * @param float $score the suggestion score
         * @param string $text|null the suggestion text
-        * @param string $url|null the suggestion URL
-        * @param float|0 the suggestion score
         * @param Title|null $suggestedTitle the suggested title
-        * @param int|null the suggested title ID
+        * @param int|null $suggestedTitleID the suggested title ID
         */
-       public function __construct( $text = null, $url = null, $score = 0, 
Title $suggestedTitle = null, $suggestedTitleID = null ) {
-               $this->text = $text;
-               $this->url = $url;
+       public function __construct( $score, $text = null, Title 
$suggestedTitle = null, $suggestedTitleID = null ) {
                $this->score = $score;
-               $this->suggestedTitle = $suggestedTitle;
+               $this->text = $text;
+               if ( $suggestedTitle ) {
+                       $this->setSuggestedTitle( $suggestedTitle  );
+               }
                $this->suggestedTitleID = $suggestedTitleID;
        }
 
@@ -81,11 +81,15 @@
        }
 
        /**
-        * Set the suggestion text
+        * Set the suggestion text.
         * @param string $text
+        * @param bool $setTitle Should we also update the title?
         */
-       public function setText( $text ) {
+       public function setText( $text, $setTitle = true ) {
                $this->text = $text;
+               if ( $setTitle && $text ) {
+                       $this->setSuggestedTitle( Title::makeTitle( 0, $text ) 
);
+               }
        }
 
        /**
@@ -102,9 +106,9 @@
         * @param Title|null $title
         * @param boolean|false $generateURL set to true to generate the URL 
based on this Title
         */
-       public function setSuggestedTitle( Title $title = null, $generateURL = 
false ) {
+       public function setSuggestedTitle( Title $title = null ) {
                $this->suggestedTitle = $title;
-               if ( $title !== null && $generateURL ) {
+               if ( $title !== null ) {
                        $this->url = wfExpandUrl( $title->getFullURL(), 
PROTO_CURRENT );
                }
        }
@@ -158,4 +162,30 @@
        public function setURL( $url ) {
                $this->url = $url;
        }
+
+       /**
+        * Create suggestion from Title
+        * @param float $score Suggestions score
+        * @param Title $title
+        * @return SearchSuggestion
+        */
+       public static function fromTitle( $score, Title $title ) {
+               return new self( $score, $title->getPrefixedText(), $title, 
$title->getArticleID() );
+       }
+
+       /**
+        * Create suggestion from text
+        * Will also create a title if text if not empty.
+        * @param float $score Suggestions score
+        * @param string text
+        * @return SearchSuggestion
+        */
+       public static function fromText( $score, $text ) {
+               $suggestion = new self( $score, $text );
+               if ( $text ) {
+                       $suggestion->setSuggestedTitle( Title::makeTitle( 0, 
$text ) );
+               }
+               return $suggestion;
+       }
+
 }
diff --git a/includes/Search/SearchSuggestionSet.php 
b/includes/Search/SearchSuggestionSet.php
index b8376a1..fbf3479 100644
--- a/includes/Search/SearchSuggestionSet.php
+++ b/includes/Search/SearchSuggestionSet.php
@@ -23,18 +23,26 @@
  */
 
 /**
- * A set of SearchSuggestions
+ * A set of search suggestions.
+ * The set is always ordered by score, with the best match first.
  */
 class SearchSuggestionSet {
        /**
         * @var SearchSuggestion[]
         */
-       private $suggestions;
+       private $suggestions = array();
+
+       /**
+        *
+        * @var array
+        */
+       private $pageMap = array();
 
        /**
         * Builds a new set of suggestions.
         *
         * NOTE: the array should be sorted by score (higher is better),
+        * in descending order.
         * SearchSuggestionSet will not try to re-order this input array.
         * Providing an unsorted input array is a mistake and will lead to
         * unexpected behaviors.
@@ -42,7 +50,13 @@
         * @param SearchSuggestion[] $suggestions (must be sorted by score)
         */
        public function __construct( array $suggestions ) {
-               $this->suggestions = array_values( $suggestions );
+               foreach ( $suggestions as $suggestion ) {
+                       $pageID = $suggestion->getSuggestedTitleID();
+                       if ( $pageID && empty( $this->pageMap[$pageID] ) ) {
+                               $this->pageMap[$pageID] = true;
+                       }
+                       $this->suggestions[] = $suggestion;
+               }
        }
 
        public function getSuggestions() {
@@ -65,11 +79,18 @@
         *
         * @param SearchSuggestion $suggestion
         */
-       public function addSuggestion( SearchSuggestion $suggestion ) {
+       public function append( SearchSuggestion $suggestion ) {
+               $pageID = $suggestion->getSuggestedTitleID();
+               if ( $pageID && isset( $this->pageMap[$pageID] ) ) {
+                       return;
+               }
                if ( $this->getSize() > 0 && $suggestion->getScore() >= 
$this->getWorstScore() ) {
                        $suggestion->setScore( $this->getWorstScore() - 1);
                }
                $this->suggestions[] = $suggestion;
+               if ( $pageID ) {
+                       $this->pageMap[$pageID] = true;
+               }
        }
 
        /**
@@ -77,7 +98,8 @@
         */
        public function rescore( $key ) {
                $removed = array_splice( $this->suggestions, $key, 1 );
-               $this->insertBestSuggestion( $removed[0] );
+               unset( $this->pageMap[$removed[0]->getSuggestedTitleID()] );
+               $this->prepend( $removed[0] );
        }
 
        /**
@@ -85,11 +107,18 @@
         * is lower than the best one its score will be updated (best + 1)
         * @param SearchSuggestion $suggestion
         */
-       public function insertBestSuggestion( SearchSuggestion $suggestion ) {
+       public function prepend( SearchSuggestion $suggestion ) {
+               $pageID = $suggestion->getSuggestedTitleID();
+               if ( $pageID && isset( $this->pageMap[$pageID] ) ) {
+                       return;
+               }
                if( $this->getSize() > 0 && $suggestion->getScore() <= 
$this->getBestScore() ) {
                        $suggestion->setScore( $this->getBestScore() + 1 );
                }
-               array_unshift( $this->suggestions, $suggestion );
+               array_unshift( $this->suggestions,  $suggestion );
+               if ( $pageID ) {
+                       $this->pageMap[$pageID] = true;
+               }
        }
 
        /**
@@ -99,7 +128,7 @@
                if ( empty( $this->suggestions ) ) {
                        return 0;
                }
-               return reset( $this->suggestions )->getScore();
+               return $this->suggestions[0]->getScore();
        }
 
        /**
@@ -139,11 +168,10 @@
         * @return SearchSuggestionSet
         */
        public static function fromTitles( array $titles ) {
-               $suggestions = array();
                $score = count( $titles );
-               foreach( $titles as $title ) {
-                       $suggestions[] = new SearchSuggestion( 
$title->getPrefixedText(), wfExpandUrl( $title->getFullURL(), PROTO_CURRENT ), 
$score--, $title );
-               }
+               $suggestions = array_map( function( $title ) use ( &$score ) {
+                       return SearchSuggestion::fromTitle( $score--, $title );
+               }, $titles );
                return new SearchSuggestionSet( $suggestions );
        }
 
diff --git a/includes/UserTesting.php b/includes/UserTesting.php
index f8d8709..b9671b5 100644
--- a/includes/UserTesting.php
+++ b/includes/UserTesting.php
@@ -77,6 +77,13 @@
        }
 
        /**
+        * Unit test only
+        */
+       public static function resetInstance() {
+               self::$instance = null;
+       }
+
+       /**
         * @var array $config
         * @var callable|null $callback
         */
diff --git a/scripts/gen-autoload.php b/scripts/gen-autoload.php
index 62d4fe6..dd5476c 100644
--- a/scripts/gen-autoload.php
+++ b/scripts/gen-autoload.php
@@ -11,6 +11,7 @@
        foreach ( glob( $base . '/*.php' ) as $file ) {
                $generator->readFile( $file );
        }
+       $generator->readFile( __DIR__ . 'tests/phpunit/TestUtils.php' );
 
        $generator->generateAutoload( basename( __DIR__ ) . '/' . basename( 
__FILE__ ) );
 
diff --git a/tests/browser/features/suggest_api.feature 
b/tests/browser/features/suggest_api.feature
index 4e87eb8..52e74ef 100644
--- a/tests/browser/features/suggest_api.feature
+++ b/tests/browser/features/suggest_api.feature
@@ -41,7 +41,7 @@
     | max         | Max Eisenhardt    |
     | magnetu     | Magneto           |
 
-  Scenario Outline: Search prefers exact match over fuzzy match
+  Scenario Outline: Search prefers exact match over fuzzy match and ascii 
folded
     When I ask suggestion API for <term>
       Then the API should produce list starting with <suggested>
   Examples:
@@ -49,6 +49,8 @@
     | max         | Max Eisenhardt    |
     | mai         | Main Page         |
     | eis         | Eisenhardt, Max   |
+    | ele         | Elektra           |
+    | éle         | Électricité       |
 
   Scenario Outline: Search prefers exact db match over partial prefix match
     When I ask suggestion API at most 2 items for <term>
diff --git a/tests/browser/features/support/hooks.rb 
b/tests/browser/features/support/hooks.rb
index 91be10b..24c2265 100644
--- a/tests/browser/features/support/hooks.rb
+++ b/tests/browser/features/support/hooks.rb
@@ -644,6 +644,8 @@
         And a page named Ice Man (Marvel Comics) exists with contents 
#REDIRECT [[Iceman]]
         And a page named Ice-Man (comics books) exists with contents #REDIRECT 
[[Iceman]]
         And a page named Ultimate Iceman exists with contents #REDIRECT 
[[Iceman]]
+        And a page named Électricité exists with contents This is electicity 
in french.
+        And a page named Elektra exists with contents Elektra is a fictional 
character appearing in American comic books published by Marvel Comics.
         And I reindex suggestions
     )
     suggest = true
diff --git a/tests/unit/CompletionSuggesterTest.php 
b/tests/unit/CompletionSuggesterTest.php
new file mode 100644
index 0000000..1cd09c7
--- /dev/null
+++ b/tests/unit/CompletionSuggesterTest.php
@@ -0,0 +1,221 @@
+<?php
+
+namespace CirrusSearch;
+
+use \CirrusSearch\Test\HashSearchConfig;
+use \CirrusSearch\Test\DummyConnection;
+use \CirrusSearch\BuildDocument\SuggestBuilder;
+
+/**
+ * Completion Suggester Tests
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ * http://www.gnu.org/copyleft/gpl.html
+ */
+class CompletionSuggesterTest extends \PHPUnit_Framework_TestCase {
+
+       /**
+        * @dataProvider provideQueries
+        */
+       public function testQueries( $config, $limit, $search, $variants, 
$expectedProfiles, $expectedQueries ) {
+               $completion = new MyCompletionSuggester( new HashSearchConfig( 
$config ), $limit );
+               list( $profiles, $suggest ) = $completion->testBuildQuery( 
$search, $variants );
+               $this->assertEquals( $expectedProfiles, $profiles );
+               $this->assertEquals( $expectedQueries, $suggest );
+       }
+
+
+       public function provideQueries() {
+               $simpleProfile = array(
+                       'plain' => array(
+                               'field' => 'suggest',
+                               'min_query_len' => 0,
+                               'discount' => 1.0,
+                               'fetch_limit_factor' => 2,
+                       ),
+               );
+
+               $simpleFuzzy = $simpleProfile + array(
+                       'plain-fuzzy' => array(
+                               'field' => 'suggest',
+                               'min_query_len' => 0,
+                               'fuzzy' => array(
+                                       'fuzzyness' => 'AUTO',
+                                       'prefix_length' => 1,
+                                       'unicode_aware' => true,
+                               ),
+                               'discount' => 0.5,
+                               'fetch_limit_factor' => 1.5
+                       )
+               );
+
+               return array(
+                       "simple" => array(
+                               array( 'CirrusSearchCompletionSettings' => 
$simpleProfile ),
+                               10,
+                               ' complete me ',
+                               null,
+                               $simpleProfile, // The profile remains 
unmodified here
+                               array(
+                                       'plain' => array(
+                                               'text' => 'complete me ', // 
keep trailing white spaces
+                                               'completion' => array(
+                                                       'field' => 'suggest',
+                                                       'size' => 20, // effect 
of fetch_limit_factor
+                                               ),
+                                       ),
+                               ),
+                       ),
+                       "simple with fuzzy" => array(
+                               array( 'CirrusSearchCompletionSettings' => 
$simpleFuzzy ),
+                               10,
+                               ' complete me ',
+                               null,
+                               $simpleFuzzy, // The profiles remains 
unmodified here
+                               array(
+                                       'plain' => array(
+                                               'text' => 'complete me ', // 
keep trailing white spaces
+                                               'completion' => array(
+                                                       'field' => 'suggest',
+                                                       'size' => 20, // effect 
of fetch_limit_factor
+                                               ),
+                                       ),
+                                       'plain-fuzzy' => array(
+                                               'text' => 'complete me ', // 
keep trailing white spaces
+                                               'completion' => array(
+                                                       'field' => 'suggest',
+                                                       'size' => 15.0, // 
effect of fetch_limit_factor
+                                                       // fuzzy config is 
simply copied from the profile
+                                                       'fuzzy' => array(
+                                                               'fuzzyness' => 
'AUTO',
+                                                               'prefix_length' 
=> 1,
+                                                               'unicode_aware' 
=> true,
+                                                       ),
+                                               ),
+                                       ),
+                               ),
+                       ),
+                       "simple with variants" => array(
+                               array( 'CirrusSearchCompletionSettings' => 
$simpleProfile ),
+                               10,
+                               ' complete me ',
+                               array( ' variant1 ', ' complete me ', ' 
variant2 ' ),
+                               // Profile is updated with extra variant setup
+                               // to include an extra discount
+                               // ' complete me ' variant duplicate will be 
ignored
+                               $simpleProfile + array(
+                                       'plain-variant-1' => array(
+                                               'field' => 'suggest',
+                                               'min_query_len' => 0,
+                                               'discount' => 1.0 * 
CompletionSuggester::VARIANT_EXTRA_DISCOUNT,
+                                               'fetch_limit_factor' => 2,
+                                               'fallback' => true, // extra 
key added, not used for now
+                                       ),
+                                       'plain-variant-2' => array(
+                                               'field' => 'suggest',
+                                               'min_query_len' => 0,
+                                               'discount' => 1.0 * 
(CompletionSuggester::VARIANT_EXTRA_DISCOUNT/2),
+                                               'fetch_limit_factor' => 2,
+                                               'fallback' => true, // extra 
key added, not used for now
+                                       )
+                               ),
+                               array(
+                                       'plain' => array(
+                                               'text' => 'complete me ', // 
keep trailing white spaces
+                                               'completion' => array(
+                                                       'field' => 'suggest',
+                                                       'size' => 20, // effect 
of fetch_limit_factor
+                                               ),
+                                       ),
+                                       'plain-variant-1' => array(
+                                               'text' => 'variant1 ',
+                                               'completion' => array(
+                                                       'field' => 'suggest',
+                                                       'size' => 20, // effect 
of fetch_limit_factor
+                                               ),
+                                       ),
+                                       'plain-variant-2' => array(
+                                               'text' => 'variant2 ',
+                                               'completion' => array(
+                                                       'field' => 'suggest',
+                                                       'size' => 20, // effect 
of fetch_limit_factor
+                                               ),
+                                       ),
+                               ),
+                       ),
+               );
+       }
+
+       /**
+        * @dataProvider provideMinMaxQueries
+        */
+       public function testMinMaxDefaultProfile( $len, $query ) {
+               global $wgCirrusSearchCompletionProfiles;
+               $config = array( 'CirrusSearchCompletionSettings' => 
$wgCirrusSearchCompletionProfiles['default'] );
+               // Test that we generate at most 4 profiles
+               $completion = new MyCompletionSuggester( new HashSearchConfig( 
$config ), 1 );
+               list( $profiles, $suggest ) = $completion->testBuildQuery( 
$query, array() );
+               // Unused profiles are kept
+               $this->assertEquals( count( 
$wgCirrusSearchCompletionProfiles['default'] ), count( $profiles ) );
+               // Never run more than 4 suggest query (without variants)
+               $this->assertTrue( count( $suggest ) <= 4 );
+               // small queries
+               $this->assertTrue( count( $suggest ) >= 2 );
+
+               if ( $len < 3 ) {
+                       // We do not run fuzzy for small queries
+                       $this->assertEquals( 2, count( $suggest ) );
+                       foreach( $suggest as $key => $value ) {
+                               $this->assertArrayNotHasKey( 'fuzzy', $value );
+                       }
+               }
+               foreach( $suggest as $key => $value ) {
+                       // Make sure the query is truncated otherwise elastic 
won't send results
+                       $this->assertTrue( mb_strlen( $value['text'] ) < 
SuggestBuilder::MAX_INPUT_LENGTH );
+               }
+               foreach( array_keys( $suggest ) as $sug ) {
+                       // Makes sure we have the corresponding profile
+                       $this->assertArrayHasKey( $sug, $profiles );
+               }
+       }
+
+       public function provideMinMaxQueries() {
+               $queries = array();
+               // The completion should not count extra spaces
+               // This is to avoid enbling costly fuzzy profiles
+               // by cheating with spaces
+               $query = '  ';
+               for( $i = 0; $i < 100; $i++ ) {
+                       $test = "Query length {$i}";
+                       $queries[$test] = array( $i, $query . '   ' );
+                       $query .= '';
+               }
+               return $queries;
+       }
+}
+
+/**
+ * No package visibility in with PHP so we have to subclass...
+ */
+class MyCompletionSuggester extends CompletionSuggester {
+       public function __construct( SearchConfig $config, $limit ) {
+               parent::__construct( new DummyConnection(), $limit, $config, 
array( NS_MAIN ), null, "dummy" );
+       }
+
+       public function testBuildQuery( $search, $variants ) {
+               $this->setTermAndVariants( $search, $variants );
+               return $this->buildQuery();
+       }
+}
diff --git a/tests/unit/RescoreBuilderTest.php 
b/tests/unit/RescoreBuilderTest.php
index a57bf00..4b349f2 100644
--- a/tests/unit/RescoreBuilderTest.php
+++ b/tests/unit/RescoreBuilderTest.php
@@ -2,6 +2,8 @@
 
 namespace CirrusSearch\Search;
 
+use CirrusSearch\Test\HashSearchConfig;
+
 class RescoreBuilderTest extends \PHPUnit_Framework_TestCase {
        public function testFunctionScoreDecorator() {
                $func = new FunctionScoreDecorator();
@@ -501,11 +503,5 @@
                                ),
                        ),
                );
-       }
-}
-
-class HashSearchConfig extends \CirrusSearch\SearchConfig {
-       public function __construct( array $settings ) {
-               $this->setSource( new \HashConfig( $settings ) );
        }
 }
diff --git a/tests/unit/Search/SearchSuggestionSetTest.php 
b/tests/unit/Search/SearchSuggestionSetTest.php
index 54062da..8b502a4 100644
--- a/tests/unit/Search/SearchSuggestionSetTest.php
+++ b/tests/unit/Search/SearchSuggestionSetTest.php
@@ -29,18 +29,18 @@
        public function testAppend() {
                $set = SearchSuggestionSet::emptySuggestionSet();
                $this->assertEquals( 0, $set->getSize() );
-               $set->addSuggestion( new SearchSuggestion( null, null, 3 ) );
+               $set->append( new SearchSuggestion( 3 ) );
                $this->assertEquals( 3, $set->getWorstScore() );
                $this->assertEquals( 3, $set->getBestScore() );
 
-               $suggestion = new SearchSuggestion( null, null, 4 );
-               $set->addSuggestion( $suggestion );
+               $suggestion = new SearchSuggestion( 4 );
+               $set->append( $suggestion );
                $this->assertEquals( 2, $set->getWorstScore() );
                $this->assertEquals( 3, $set->getBestScore() );
                $this->assertEquals( 2, $suggestion->getScore() );
 
-               $suggestion = new SearchSuggestion( null, null, 2 );
-               $set->addSuggestion( $suggestion );
+               $suggestion = new SearchSuggestion( 2 );
+               $set->append( $suggestion );
                $this->assertEquals( 1, $set->getWorstScore() );
                $this->assertEquals( 3, $set->getBestScore() );
                $this->assertEquals( 1, $suggestion->getScore() );
@@ -58,24 +58,24 @@
        public function testInsertBest() {
                $set = SearchSuggestionSet::emptySuggestionSet();
                $this->assertEquals( 0, $set->getSize() );
-               $set->insertBestSuggestion( new SearchSuggestion( null, null, 3 
) );
+               $set->prepend( new SearchSuggestion( 3 ) );
                $this->assertEquals( 3, $set->getWorstScore() );
                $this->assertEquals( 3, $set->getBestScore() );
 
-               $suggestion = new SearchSuggestion( null, null, 4 );
-               $set->insertBestSuggestion( $suggestion );
+               $suggestion = new SearchSuggestion( 4 );
+               $set->prepend( $suggestion );
                $this->assertEquals( 3, $set->getWorstScore() );
                $this->assertEquals( 4, $set->getBestScore() );
                $this->assertEquals( 4, $suggestion->getScore() );
 
-               $suggestion = new SearchSuggestion( null, null, null );
-               $set->insertBestSuggestion( $suggestion );
+               $suggestion = new SearchSuggestion( 0 );
+               $set->prepend( $suggestion );
                $this->assertEquals( 3, $set->getWorstScore() );
                $this->assertEquals( 5, $set->getBestScore() );
                $this->assertEquals( 5, $suggestion->getScore() );
 
-               $suggestion = new SearchSuggestion( null, null, 2 );
-               $set->insertBestSuggestion( $suggestion );
+               $suggestion = new SearchSuggestion( 2 );
+               $set->prepend( $suggestion );
                $this->assertEquals( 3, $set->getWorstScore() );
                $this->assertEquals( 6, $set->getBestScore() );
                $this->assertEquals( 6, $suggestion->getScore() );
@@ -89,7 +89,7 @@
        public function testShrink() {
                $set = SearchSuggestionSet::emptySuggestionSet();
                for( $i = 0; $i < 100; $i++) {
-                       $set->addSuggestion( new SearchSuggestion() );
+                       $set->append( new SearchSuggestion( 0 ) );
                }
                $set->shrink( 10 );
                $this->assertEquals( 10, $set->getSize() );
@@ -97,4 +97,6 @@
                $set->shrink( 0 );
                $this->assertEquals( 0, $set->getSize() );
        }
+
+       // TODO: test for fromTitles
 }
diff --git a/tests/unit/TestUtils.php b/tests/unit/TestUtils.php
new file mode 100644
index 0000000..4db92b4
--- /dev/null
+++ b/tests/unit/TestUtils.php
@@ -0,0 +1,13 @@
+<?php
+namespace CirrusSearch\Test;
+
+class HashSearchConfig extends \CirrusSearch\SearchConfig {
+       public function __construct( array $settings ) {
+               $this->setSource( new \HashConfig( $settings ) );
+       }
+}
+
+class DummyConnection extends \CirrusSearch\Connection {
+       public function __construct() {
+       }
+}
diff --git a/tests/unit/UserTestingTest.php b/tests/unit/UserTestingTest.php
index c8c9b7c..4133e7d 100644
--- a/tests/unit/UserTestingTest.php
+++ b/tests/unit/UserTestingTest.php
@@ -23,6 +23,14 @@
  * http://www.gnu.org/copyleft/gpl.html
  */
 class UserTestingTest extends \MediaWikiTestCase {
+       /**
+        * @beforeClass
+        */
+       public static function setUpBeforeClass() {
+               ElasticSearchIntermediary::resetExecutionId();
+               UserTesting::resetInstance();
+       }
+
        public function testPartitipcationInTest() {
                $config = $this->config( 'test' );
                $ut = $this->ut( $config, true );

-- 
To view, visit https://gerrit.wikimedia.org/r/258417
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: merged
Gerrit-Change-Id: Iab87083841481107daf8048ddfabef3c8f284f79
Gerrit-PatchSet: 16
Gerrit-Project: mediawiki/extensions/CirrusSearch
Gerrit-Branch: master
Gerrit-Owner: Smalyshev <[email protected]>
Gerrit-Reviewer: Cindy-the-browser-test-bot <[email protected]>
Gerrit-Reviewer: DCausse <[email protected]>
Gerrit-Reviewer: EBernhardson <[email protected]>
Gerrit-Reviewer: Manybubbles <[email protected]>
Gerrit-Reviewer: Smalyshev <[email protected]>
Gerrit-Reviewer: jenkins-bot <>

_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to