jenkins-bot has submitted this change and it was merged.
Change subject: Refactor the suggester.
......................................................................
Refactor the suggester.
* Refactored some fragile design/methods
* Moved the variant fallback queries into CompletionSuggester.php
* Added unit tests
Change-Id: Iab87083841481107daf8048ddfabef3c8f284f79
---
M autoload.php
M includes/Api/Suggest.php
M includes/CirrusSearch.php
M includes/CompletionSuggester.php
M includes/ElasticsearchIntermediary.php
M includes/Search/SearchSuggestion.php
M includes/Search/SearchSuggestionSet.php
M includes/UserTesting.php
M scripts/gen-autoload.php
M tests/browser/features/suggest_api.feature
M tests/browser/features/support/hooks.rb
A tests/unit/CompletionSuggesterTest.php
M tests/unit/RescoreBuilderTest.php
M tests/unit/Search/SearchSuggestionSetTest.php
A tests/unit/TestUtils.php
M tests/unit/UserTestingTest.php
16 files changed, 571 insertions(+), 168 deletions(-)
Approvals:
EBernhardson: Looks good to me, approved
jenkins-bot: Verified
diff --git a/autoload.php b/autoload.php
index ddfdd74..6d7c0b2 100644
--- a/autoload.php
+++ b/autoload.php
@@ -117,4 +117,6 @@
'CirrusSearch\\UserTesting' => __DIR__ . '/includes/UserTesting.php',
'CirrusSearch\\Util' => __DIR__ . '/includes/Util.php',
'CirrusSearch\\Version' => __DIR__ . '/includes/Version.php',
+ 'CirrusSearch\\Test\\HashSearchConfig' => __DIR__ .
'/tests/unit/TestUtils.php',
+ 'CirrusSearch\\Test\\DummyConnection' => __DIR__ .
'/tests/unit/TestUtils.php',
);
diff --git a/includes/Api/Suggest.php b/includes/Api/Suggest.php
index a455a3d..944be7c 100644
--- a/includes/Api/Suggest.php
+++ b/includes/Api/Suggest.php
@@ -40,7 +40,6 @@
}
$suggestions = $cirrus->searchSuggestions( $queryText );
-
// Use the same cache options used by OpenSearch
$this->getMain()->setCacheMaxAge( $this->getConfig()->get(
'SearchSuggestCacheExpiry' ) );
$this->getMain()->setCacheMode( 'public' );
diff --git a/includes/CirrusSearch.php b/includes/CirrusSearch.php
index c0a0bd3..7927209 100644
--- a/includes/CirrusSearch.php
+++ b/includes/CirrusSearch.php
@@ -390,50 +390,22 @@
$suggester = new CompletionSuggester( $this->connection,
$this->limit,
$config, $this->namespaces, $user, $this->indexBaseName
);
- $response = $suggester->suggest( $search );
- $suggestions = SearchSuggestionSet::emptySuggestionSet();
+ // Not really useful, mostly for testing purpose
+ $variants = $request->getArray(
'cirrusCompletionSuggesterVariant' );
+ if ( empty( $variants ) ) {
+ global $wgContLang;
+ $variants = $wgContLang->autoConvertToAllVariants(
$search );
+ } else if ( count( $variants ) > 3 ) {
+ // We should not allow too many variants
+ $variants = array_slice( $variants, 0, 3 );
+ }
+
+ $response = $suggester->suggest( $search, $variants );
if ( $response->isOK() ) {
// Errors will be logged, let's try the exact db match
$suggestions = $response->getValue();
- }
-
- // if the content language has variants, try to retrieve
fallback results
- $fallbackLimit = $this->limit - $suggestions->getSize();;
-
- // Copied from PrefixSearch
- // @todo: verify if this is really needed, if variants are
- // close enough fuzzy suggestions could already cover this
- // usecase.
- if ( $fallbackLimit > 0 ) {
- global $wgContLang;
-
- $fallbackSearches =
$wgContLang->autoConvertToAllVariants( $search );
- $fallbackSearches = array_diff( array_unique(
$fallbackSearches ), array( $search ) );
-
- $suggester->setLimit( $fallbackLimit );
- foreach ( $fallbackSearches as $fbs ) {
- $fallbackResponse = $suggester->suggest( $fbs );
- if ( !$fallbackResponse->isOK() ) {
- continue;
- }
- $pageIds = $suggestions->map( function( $sugg )
{
- return $sugg->getSuggestedTitleID();
- });
-
- $fallbackSuggestions =
$fallbackResponse->getValue();
- // Same page can be returned (fuzzy suggestions)
- foreach( $fallbackSuggestions->getSuggestions()
as $s ) {
- if ( !in_array (
$s->getSuggestedTitleID(), $pageIds ) ) {
- $suggestions->addSuggestion( $s
);
- }
- }
-
- $fallbackLimit = $this->limit -
$suggestions->getSize();
-
- if ( $fallbackLimit <= 0 ) {
- break;
- }
- }
+ } else {
+ $suggestions =
SearchSuggestionSet::emptySuggestionSet();
}
// preload the titles with LinkBatch
@@ -454,21 +426,17 @@
$rescoredResults = $rescorer->rescore( $search,
$this->namespaces, $results, $this->limit );
if( count( $rescoredResults ) > 0 ) {
- if ( !in_array( reset( $rescoredResults ), $results ) )
{
+ $found = array_search( $rescoredResults[0], $results );
+ if ( $found === false ) {
// If the first result is not in the previous
array it
// means that we found a new exact match
- $exactTitle = Title::newFromText( reset(
$rescoredResults ) );
- $exactMatch = new SearchSuggestion();
- $exactMatch->setText(
$exactTitle->getPrefixedText() );
- $exactMatch->setSuggestedTitle( $exactTitle,
true );
- $exactMatch->setScore( 0 );
- $suggestions->insertBestSuggestion( $exactMatch
);
+ $exactMatch = SearchSuggestion::fromTitle( 0,
Title::newFromText( $rescoredResults[0] ) );
+ $suggestions->prepend( $exactMatch );
$suggestions->shrink( $this->limit );
} else {
// if the first result is not the same we need
to rescore
- if( reset( $rescoredResults ) != reset(
$results ) ) {
- $rescoredIndex = array_search( reset(
$rescoredResults ), $results );
- $suggestions->rescore( $rescoredIndex );
+ if( $found > 0 ) {
+ $suggestions->rescore( $found );
}
}
}
diff --git a/includes/CompletionSuggester.php b/includes/CompletionSuggester.php
index e0edd4d..1df1d2e 100644
--- a/includes/CompletionSuggester.php
+++ b/includes/CompletionSuggester.php
@@ -13,7 +13,6 @@
use Title;
use User;
use Elastica\Request;
-use Elastica\Exception\ResponseException;
/**
* Performs search as you type queries using Completion Suggester.
@@ -82,10 +81,23 @@
* some workarounds (https://github.com/elastic/elasticsearch/issues/10746).
*/
class CompletionSuggester extends ElasticsearchIntermediary {
+ const VARIANT_EXTRA_DISCOUNT = 0.0001;
/**
* @var string term to search.
*/
private $term;
+
+ /**
+ * @var string[]|null search variants
+ */
+ private $variants;
+
+ /**
+ * Currently very limited (see LIMITATIONS) and only works
+ * for geo context
+ * @var array|null context for contextualized suggestions
+ */
+ private $context;
/**
* @var integer maximum number of result
@@ -103,6 +115,11 @@
* Specified as public because of closures. When we move to non-anicent
PHP version, can be made protected.
*/
public $config;
+
+ /**
+ * @var string Query type (comp_suggest_geo or comp_suggest)
+ */
+ public $queryType;
/**
* @var SearchContext
@@ -140,52 +157,16 @@
* WARNING: experimental API
*
* @param string $text Search term
+ * @param string[]|null $variants Search term variants
+ * (usually issued from $wgContLang->autoConvertToAllVariants( $text ) )
* @param array $context
* @return Status
*/
- public function suggest( $text, $context = null ) {
- // Do not remove spaces at the end, the user might tell us he
finished writing a word
- $this->term = ltrim( $text );
+ public function suggest( $text, $variants = null, $context = null ) {
+ $this->setTermAndVariants( $text, $variants );
+ $this->context = $context;
- if ( mb_strlen( $this->term ) >
SuggestBuilder::MAX_INPUT_LENGTH ) {
- // Trim the query otherwise we won't find results
- $this->term = mb_substr( $this->term, 0,
SuggestBuilder::MAX_INPUT_LENGTH );
- }
-
- $suggest = array( 'text' => $text );
- $queryLen = mb_strlen( trim( $text ) ); // Avoid cheating with
spaces
- $queryType = "comp_suggest";
-
- $profiles = $this->config->get(
'CirrusSearchCompletionSettings' );
- if ( $context != null && isset( $context['geo']['lat'] ) &&
isset( $context['geo']['lon'] )
- && is_numeric( $context['geo']['lat'] ) && is_numeric(
$context['geo']['lon'] )
- ) {
- $profiles = $this->prepareGeoContextSuggestProfiles(
$context );
- $queryType = "comp_suggest_geo";
- }
-
- foreach ( $profiles as $name => $config ) {
- if ( $config['min_query_len'] > $queryLen ) {
- continue;
- }
- if ( isset( $config['max_query_len'] ) && $queryLen >
$config['max_query_len'] ) {
- continue;
- }
- $field = $config['field'];
- $suggest[$name] = array(
- 'completion' => array(
- 'field' => $field,
- 'size' => $this->limit *
$config['fetch_limit_factor']
- )
- );
- if ( isset( $config['fuzzy'] ) ) {
- $suggest[$name]['completion']['fuzzy'] =
$config['fuzzy'];
- }
- if ( isset( $config['context'] ) ) {
- $suggest[$name]['completion']['context'] =
$config['context'];
- }
- }
-
+ list( $profiles, $suggest ) = $this->buildQuery();
$queryOptions = array();
$queryOptions[ 'timeout' ] = $this->config->getElement(
'CirrusSearchSearchShardTimeout', 'default' );
$this->connection->setTimeout( $queryOptions[ 'timeout' ] );
@@ -193,7 +174,7 @@
$index = $this->connection->getIndex( $this->indexBaseName,
Connection::TITLE_SUGGEST_TYPE );
$logContext = array(
'query' => $text,
- 'queryType' => $queryType,
+ 'queryType' => $this->queryType,
);
$searcher = $this;
$limit = $this->limit;
@@ -207,7 +188,7 @@
try {
$result = $index->request( "_suggest",
Request::POST, $suggest, $queryOptions );
if( $result->isOk() ) {
- $result =
$searcher->postProcessSuggest( $text, $result,
+ $result =
$searcher->postProcessSuggest( $result,
$profiles, $limit );
return $searcher->success(
$result );
}
@@ -221,13 +202,157 @@
}
/**
+ * protected for tests
+ */
+ protected function setTermAndVariants( $term, $variants = null ) {
+ $this->term = $term;
+ if ( empty( $variants ) ) {
+ $this->variants = null;
+ return;
+ }
+ $variants = array_diff( array_unique( $variants ), array( $term
) );
+ if ( empty( $variants ) ) {
+ $this->variants = null;
+ } else {
+ $this->variants = $variants;
+ }
+ }
+
+ /**
+ * Builds the suggest queries and profiles.
+ * Use with list( $profiles, $suggest ).
+ * @return array the profiles and suggest queries
+ */
+ protected function buildQuery() {
+ if ( mb_strlen( $this->term ) >
SuggestBuilder::MAX_INPUT_LENGTH ) {
+ // Trim the query otherwise we won't find results
+ $this->term = mb_substr( $this->term, 0,
SuggestBuilder::MAX_INPUT_LENGTH );
+ }
+
+ $queryLen = mb_strlen( trim( $this->term ) ); // Avoid cheating
with spaces
+ $this->queryType = "comp_suggest";
+
+ $profiles = $this->config->get(
'CirrusSearchCompletionSettings' );
+ if ( $this->context != null && isset(
$this->context['geo']['lat'] )
+ && isset( $this->context['geo']['lon'] ) && is_numeric(
$this->context['geo']['lat'] )
+ && is_numeric( $this->context['geo']['lon'] )
+ ) {
+ $profiles = $this->prepareGeoContextSuggestProfiles();
+ $this->queryType = "comp_suggest_geo";
+ }
+
+ $suggest = $this->buildSuggestQueries( $profiles, $this->term,
$queryLen );
+
+ // Handle variants, update the set of profiles and suggest
queries
+ if ( !empty( $this->variants ) ) {
+ list( $addProfiles, $addSuggest ) =
$this->handleVariants( $profiles, $queryLen );
+ $profiles += $addProfiles;
+ $suggest += $addSuggest;
+ }
+ return array( $profiles, $suggest );
+ }
+
+ /**
+ * Builds a set of suggest query by reading the list of profiles
+ * @param array $profiles
+ * @param string $query
+ * @param int $queryLen the length to use when checking
min/max_query_len
+ * @return array a set of suggest queries ready to for elastic
+ */
+ protected function buildSuggestQueries( array $profiles, $query,
$queryLen ) {
+ $suggest = array();
+ foreach($profiles as $name => $config) {
+ $sugg = $this->buildSuggestQuery( $config, $query,
$queryLen );
+ if(!$sugg) {
+ continue;
+ }
+ $suggest[$name] = $sugg;
+ }
+ return $suggest;
+ }
+
+ /**
+ * Builds a suggest query from a profile
+ * @param array $config Profile
+ * @param string $query
+ * @param int $queryLen the length to use when checking
min/max_query_len
+ * @return array|null suggest query ready to for elastic or null
+ */
+ protected function buildSuggestQuery( array $config, $query, $queryLen
) {
+ // Do not remove spaces at the end, the user might tell us he
finished writing a word
+ $query = ltrim( $query );
+ if ( $config['min_query_len'] > $queryLen ) {
+ return null;
+ }
+ if ( isset( $config['max_query_len'] ) && $queryLen >
$config['max_query_len'] ) {
+ return null;
+ }
+ $field = $config['field'];
+ $suggest = array(
+ 'text' => $query,
+ 'completion' => array(
+ 'field' => $field,
+ 'size' => $this->limit *
$config['fetch_limit_factor']
+ )
+ );
+ if ( isset( $config['fuzzy'] ) ) {
+ $suggest['completion']['fuzzy'] = $config['fuzzy'];
+ }
+ if ( isset( $config['context'] ) ) {
+ $suggest['completion']['context'] = $config['context'];
+ }
+ return $suggest;
+ }
+
+ /**
+ * Update the suggest queries and return additional profiles flagged
the 'fallback' key
+ * with a discount factor = originalDiscount * 0.0001/(variantIndex+1).
+ * @param array $profiles the default profiles
+ * @param array $suggest the current set of suggest queries
+ * @param string[]|null $variants the list of variant queries
+ * @param string $search the original query
+ * @param int $queryLen the original query length
+ * @return array new variant profiles
+ */
+ protected function handleVariants( array $profiles, $queryLen ) {
+ $variantIndex = 0;
+ $allVariantProfiles = array();
+ $allSuggestions = array();
+ foreach( $this->variants as $variant ) {
+ $variantIndex++;
+ foreach ( $profiles as $name => $profile ) {
+ $variantProfName = $name . '-variant-' .
$variantIndex;
+ $allVariantProfiles[$variantProfName] =
$this->buildVariantProfile( $profile,
self::VARIANT_EXTRA_DISCOUNT/$variantIndex );
+ $allSuggestions[$variantProfName] =
$this->buildSuggestQuery(
+
$allVariantProfiles[$variantProfName], $variant, $queryLen
+ );
+ }
+ }
+ return array( $allVariantProfiles, $allSuggestions );
+ }
+
+ /**
+ * Creates a copy of $profile[$name] with a custom '-variant-SEQ'
suffix.
+ * And applies an extra discount factor of 0.0001.
+ * The copy is added to the profiles container.
+ * @param array $profile profile to copy
+ * @param float $extraDiscount extra discount factor to rank variant
suggestion lower.
+ */
+ protected function buildVariantProfile( array $profile, $extraDiscount
= 0.0001 ) {
+ // mark the profile as a fallback query
+ $profile['fallback'] = true;
+ $profile['discount'] *= $extraDiscount;
+ return $profile;
+ }
+
+ /**
* prepare the list of suggest requests used for geo context suggestions
* This method will merge $this->config->get(
'CirrusSearchCompletionSettings and
* $this->config->get( 'CirrusSearchCompletionGeoContextSettings
* @param array $context user's geo context
* @return array of suggest request profiles
*/
- private function prepareGeoContextSuggestProfiles( $context ) {
+ private function prepareGeoContextSuggestProfiles() {
$profiles = array();
foreach ( $this->config->get(
'CirrusSearchCompletionGeoContextSettings' ) as $geoname => $geoprof ) {
foreach ( $this->config->get(
'CirrusSearchCompletionSettings' ) as $sugname => $sugprof ) {
@@ -239,8 +364,8 @@
$profile['discount'] *= $geoprof['discount'];
$profile['context'] = array(
'location' => array(
- 'lat' => $context['geo']['lat'],
- 'lon' => $context['geo']['lon'],
+ 'lat' =>
$this->context['geo']['lat'],
+ 'lon' =>
$this->context['geo']['lon'],
'precision' =>
$geoprof['precision']
)
);
@@ -261,7 +386,7 @@
* @param int $limit Maximum suggestions to return, -1 for unlimited
* @return SearchSuggestionSet a set of Suggestions
*/
- protected function postProcessSuggest( $query, \Elastica\Response
$response, $profiles, $limit = -1 ) {
+ protected function postProcessSuggest( \Elastica\Response $response,
$profiles, $limit = -1 ) {
$this->logContext['elasticTookMs'] = intval(
$response->getQueryTime() * 1000 );
$data = $response->getData();
unset( $data['_shards'] );
@@ -283,7 +408,7 @@
if ( !isset( $suggestions[$pageId] ) ||
$score >
$suggestions[$pageId]->getScore()
) {
- $suggestion = new
SearchSuggestion( null, null, $score, null, $pageId );
+ $suggestion = new
SearchSuggestion( $score, null, null, $pageId );
// If it's a title suggestion
we have the text
if ( $type ===
SuggestBuilder::TITLE_SUGGESTION ) {
$suggestion->setText(
$output['text'] );
@@ -337,47 +462,39 @@
if ( $redirResponse->isOk() ) {
$this->logContext['elasticTook2PassMs']
= intval( $redirResponse->getQueryTime() * 1000 );
$docs = $redirResponse->getData();
- $docs = $docs['docs'];
- foreach ( $docs as $doc ) {
- $id = $doc['_id'];
- if ( !isset(
$doc['_source']['redirect'] )
- || empty(
$doc['_source']['redirect'] )
- ) {
+ foreach ( $docs['docs'] as $doc ) {
+ if ( empty(
$doc['_source']['redirect'] ) ) {
continue;
}
- $text =
Util::chooseBestRedirect( $query, $doc['_source']['redirect'] );
- $suggestions[$id]->setText(
$text );
+ // We use the original query,
we should maybe use the variant that generated this result?
+ $text =
Util::chooseBestRedirect( $this->term, $doc['_source']['redirect'] );
+ if( !empty(
$suggestions[$doc['_id']] ) ) {
+
$suggestions[$doc['_id']]->setText( $text );
+ }
}
} else {
LoggerFactory::getInstance(
'CirrusSearch' )->warning(
'Unable to fetch redirects for
suggestion {query} with results {ids} : {error}',
- array( 'query' => $query,
+ array( 'query' => $this->term,
'ids' => serialize(
$missingText ),
'error' =>
$redirResponse->getError() ) );
}
} catch ( \Elastica\Exception\ExceptionInterface $e ) {
LoggerFactory::getInstance( 'CirrusSearch'
)->warning(
'Unable to fetch redirects for
suggestion {query} with results {ids} : {error}',
- array( 'query' => $query,
+ array( 'query' => $this->term,
'ids' => serialize(
$missingText ),
'error' =>
$this->extractMessage( $e ) ) );
}
}
- $retval = array();
- foreach ( $suggestions as $suggestion ) {
- if ( $suggestion->getText() === null ) {
- // We were unable to find a text to display
- // Maybe a page with redirects when we built
the suggester index
- // but now without redirects?
- continue;
+ return new SearchSuggestionSet( array_filter(
+ $suggestions,
+ function ( $suggestion ) {
+ // text should be not empty for suggestions
+ return $suggestion->getText() != null;
}
- // Populate the SearchSuggestion object
- $suggestion->setSuggestedTitle( Title::makeTitle( 0,
$suggestion->getText() ), true );
- $retval[] = $suggestion;
- }
-
- return new SearchSuggestionSet( $retval );
+ ) );
}
/**
diff --git a/includes/ElasticsearchIntermediary.php
b/includes/ElasticsearchIntermediary.php
index 7738c82..12992e6 100644
--- a/includes/ElasticsearchIntermediary.php
+++ b/includes/ElasticsearchIntermediary.php
@@ -109,6 +109,13 @@
}
/**
+ * Unit tests only
+ */
+ public static function resetExecutionId() {
+ self::$executionId = null;
+ }
+
+ /**
* Summarizes all the requests made in this process and reports
* them along with the test they belong to.
* Only public due to php 5.3 not having access from closures
diff --git a/includes/Search/SearchSuggestion.php
b/includes/Search/SearchSuggestion.php
index ec46971..e39a982 100644
--- a/includes/Search/SearchSuggestion.php
+++ b/includes/Search/SearchSuggestion.php
@@ -58,17 +58,17 @@
/**
* Construct a new suggestion
+ * @param float $score the suggestion score
* @param string $text|null the suggestion text
- * @param string $url|null the suggestion URL
- * @param float|0 the suggestion score
* @param Title|null $suggestedTitle the suggested title
- * @param int|null the suggested title ID
+ * @param int|null $suggestedTitleID the suggested title ID
*/
- public function __construct( $text = null, $url = null, $score = 0,
Title $suggestedTitle = null, $suggestedTitleID = null ) {
- $this->text = $text;
- $this->url = $url;
+ public function __construct( $score, $text = null, Title
$suggestedTitle = null, $suggestedTitleID = null ) {
$this->score = $score;
- $this->suggestedTitle = $suggestedTitle;
+ $this->text = $text;
+ if ( $suggestedTitle ) {
+ $this->setSuggestedTitle( $suggestedTitle );
+ }
$this->suggestedTitleID = $suggestedTitleID;
}
@@ -81,11 +81,15 @@
}
/**
- * Set the suggestion text
+ * Set the suggestion text.
* @param string $text
+ * @param bool $setTitle Should we also update the title?
*/
- public function setText( $text ) {
+ public function setText( $text, $setTitle = true ) {
$this->text = $text;
+ if ( $setTitle && $text ) {
+ $this->setSuggestedTitle( Title::makeTitle( 0, $text )
);
+ }
}
/**
@@ -102,9 +106,9 @@
* @param Title|null $title
* @param boolean|false $generateURL set to true to generate the URL
based on this Title
*/
- public function setSuggestedTitle( Title $title = null, $generateURL =
false ) {
+ public function setSuggestedTitle( Title $title = null ) {
$this->suggestedTitle = $title;
- if ( $title !== null && $generateURL ) {
+ if ( $title !== null ) {
$this->url = wfExpandUrl( $title->getFullURL(),
PROTO_CURRENT );
}
}
@@ -158,4 +162,30 @@
public function setURL( $url ) {
$this->url = $url;
}
+
+ /**
+ * Create suggestion from Title
+ * @param float $score Suggestions score
+ * @param Title $title
+ * @return SearchSuggestion
+ */
+ public static function fromTitle( $score, Title $title ) {
+ return new self( $score, $title->getPrefixedText(), $title,
$title->getArticleID() );
+ }
+
+ /**
+ * Create suggestion from text
+ * Will also create a title if text if not empty.
+ * @param float $score Suggestions score
+ * @param string text
+ * @return SearchSuggestion
+ */
+ public static function fromText( $score, $text ) {
+ $suggestion = new self( $score, $text );
+ if ( $text ) {
+ $suggestion->setSuggestedTitle( Title::makeTitle( 0,
$text ) );
+ }
+ return $suggestion;
+ }
+
}
diff --git a/includes/Search/SearchSuggestionSet.php
b/includes/Search/SearchSuggestionSet.php
index b8376a1..fbf3479 100644
--- a/includes/Search/SearchSuggestionSet.php
+++ b/includes/Search/SearchSuggestionSet.php
@@ -23,18 +23,26 @@
*/
/**
- * A set of SearchSuggestions
+ * A set of search suggestions.
+ * The set is always ordered by score, with the best match first.
*/
class SearchSuggestionSet {
/**
* @var SearchSuggestion[]
*/
- private $suggestions;
+ private $suggestions = array();
+
+ /**
+ *
+ * @var array
+ */
+ private $pageMap = array();
/**
* Builds a new set of suggestions.
*
* NOTE: the array should be sorted by score (higher is better),
+ * in descending order.
* SearchSuggestionSet will not try to re-order this input array.
* Providing an unsorted input array is a mistake and will lead to
* unexpected behaviors.
@@ -42,7 +50,13 @@
* @param SearchSuggestion[] $suggestions (must be sorted by score)
*/
public function __construct( array $suggestions ) {
- $this->suggestions = array_values( $suggestions );
+ foreach ( $suggestions as $suggestion ) {
+ $pageID = $suggestion->getSuggestedTitleID();
+ if ( $pageID && empty( $this->pageMap[$pageID] ) ) {
+ $this->pageMap[$pageID] = true;
+ }
+ $this->suggestions[] = $suggestion;
+ }
}
public function getSuggestions() {
@@ -65,11 +79,18 @@
*
* @param SearchSuggestion $suggestion
*/
- public function addSuggestion( SearchSuggestion $suggestion ) {
+ public function append( SearchSuggestion $suggestion ) {
+ $pageID = $suggestion->getSuggestedTitleID();
+ if ( $pageID && isset( $this->pageMap[$pageID] ) ) {
+ return;
+ }
if ( $this->getSize() > 0 && $suggestion->getScore() >=
$this->getWorstScore() ) {
$suggestion->setScore( $this->getWorstScore() - 1);
}
$this->suggestions[] = $suggestion;
+ if ( $pageID ) {
+ $this->pageMap[$pageID] = true;
+ }
}
/**
@@ -77,7 +98,8 @@
*/
public function rescore( $key ) {
$removed = array_splice( $this->suggestions, $key, 1 );
- $this->insertBestSuggestion( $removed[0] );
+ unset( $this->pageMap[$removed[0]->getSuggestedTitleID()] );
+ $this->prepend( $removed[0] );
}
/**
@@ -85,11 +107,18 @@
* is lower than the best one its score will be updated (best + 1)
* @param SearchSuggestion $suggestion
*/
- public function insertBestSuggestion( SearchSuggestion $suggestion ) {
+ public function prepend( SearchSuggestion $suggestion ) {
+ $pageID = $suggestion->getSuggestedTitleID();
+ if ( $pageID && isset( $this->pageMap[$pageID] ) ) {
+ return;
+ }
if( $this->getSize() > 0 && $suggestion->getScore() <=
$this->getBestScore() ) {
$suggestion->setScore( $this->getBestScore() + 1 );
}
- array_unshift( $this->suggestions, $suggestion );
+ array_unshift( $this->suggestions, $suggestion );
+ if ( $pageID ) {
+ $this->pageMap[$pageID] = true;
+ }
}
/**
@@ -99,7 +128,7 @@
if ( empty( $this->suggestions ) ) {
return 0;
}
- return reset( $this->suggestions )->getScore();
+ return $this->suggestions[0]->getScore();
}
/**
@@ -139,11 +168,10 @@
* @return SearchSuggestionSet
*/
public static function fromTitles( array $titles ) {
- $suggestions = array();
$score = count( $titles );
- foreach( $titles as $title ) {
- $suggestions[] = new SearchSuggestion(
$title->getPrefixedText(), wfExpandUrl( $title->getFullURL(), PROTO_CURRENT ),
$score--, $title );
- }
+ $suggestions = array_map( function( $title ) use ( &$score ) {
+ return SearchSuggestion::fromTitle( $score--, $title );
+ }, $titles );
return new SearchSuggestionSet( $suggestions );
}
diff --git a/includes/UserTesting.php b/includes/UserTesting.php
index f8d8709..b9671b5 100644
--- a/includes/UserTesting.php
+++ b/includes/UserTesting.php
@@ -77,6 +77,13 @@
}
/**
+ * Unit test only
+ */
+ public static function resetInstance() {
+ self::$instance = null;
+ }
+
+ /**
* @var array $config
* @var callable|null $callback
*/
diff --git a/scripts/gen-autoload.php b/scripts/gen-autoload.php
index 62d4fe6..dd5476c 100644
--- a/scripts/gen-autoload.php
+++ b/scripts/gen-autoload.php
@@ -11,6 +11,7 @@
foreach ( glob( $base . '/*.php' ) as $file ) {
$generator->readFile( $file );
}
+ $generator->readFile( __DIR__ . 'tests/phpunit/TestUtils.php' );
$generator->generateAutoload( basename( __DIR__ ) . '/' . basename(
__FILE__ ) );
diff --git a/tests/browser/features/suggest_api.feature
b/tests/browser/features/suggest_api.feature
index 4e87eb8..52e74ef 100644
--- a/tests/browser/features/suggest_api.feature
+++ b/tests/browser/features/suggest_api.feature
@@ -41,7 +41,7 @@
| max | Max Eisenhardt |
| magnetu | Magneto |
- Scenario Outline: Search prefers exact match over fuzzy match
+ Scenario Outline: Search prefers exact match over fuzzy match and ascii
folded
When I ask suggestion API for <term>
Then the API should produce list starting with <suggested>
Examples:
@@ -49,6 +49,8 @@
| max | Max Eisenhardt |
| mai | Main Page |
| eis | Eisenhardt, Max |
+ | ele | Elektra |
+ | éle | Électricité |
Scenario Outline: Search prefers exact db match over partial prefix match
When I ask suggestion API at most 2 items for <term>
diff --git a/tests/browser/features/support/hooks.rb
b/tests/browser/features/support/hooks.rb
index 91be10b..24c2265 100644
--- a/tests/browser/features/support/hooks.rb
+++ b/tests/browser/features/support/hooks.rb
@@ -644,6 +644,8 @@
And a page named Ice Man (Marvel Comics) exists with contents
#REDIRECT [[Iceman]]
And a page named Ice-Man (comics books) exists with contents #REDIRECT
[[Iceman]]
And a page named Ultimate Iceman exists with contents #REDIRECT
[[Iceman]]
+ And a page named Électricité exists with contents This is electicity
in french.
+ And a page named Elektra exists with contents Elektra is a fictional
character appearing in American comic books published by Marvel Comics.
And I reindex suggestions
)
suggest = true
diff --git a/tests/unit/CompletionSuggesterTest.php
b/tests/unit/CompletionSuggesterTest.php
new file mode 100644
index 0000000..1cd09c7
--- /dev/null
+++ b/tests/unit/CompletionSuggesterTest.php
@@ -0,0 +1,221 @@
+<?php
+
+namespace CirrusSearch;
+
+use \CirrusSearch\Test\HashSearchConfig;
+use \CirrusSearch\Test\DummyConnection;
+use \CirrusSearch\BuildDocument\SuggestBuilder;
+
+/**
+ * Completion Suggester Tests
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ * http://www.gnu.org/copyleft/gpl.html
+ */
+class CompletionSuggesterTest extends \PHPUnit_Framework_TestCase {
+
+ /**
+ * @dataProvider provideQueries
+ */
+ public function testQueries( $config, $limit, $search, $variants,
$expectedProfiles, $expectedQueries ) {
+ $completion = new MyCompletionSuggester( new HashSearchConfig(
$config ), $limit );
+ list( $profiles, $suggest ) = $completion->testBuildQuery(
$search, $variants );
+ $this->assertEquals( $expectedProfiles, $profiles );
+ $this->assertEquals( $expectedQueries, $suggest );
+ }
+
+
+ public function provideQueries() {
+ $simpleProfile = array(
+ 'plain' => array(
+ 'field' => 'suggest',
+ 'min_query_len' => 0,
+ 'discount' => 1.0,
+ 'fetch_limit_factor' => 2,
+ ),
+ );
+
+ $simpleFuzzy = $simpleProfile + array(
+ 'plain-fuzzy' => array(
+ 'field' => 'suggest',
+ 'min_query_len' => 0,
+ 'fuzzy' => array(
+ 'fuzzyness' => 'AUTO',
+ 'prefix_length' => 1,
+ 'unicode_aware' => true,
+ ),
+ 'discount' => 0.5,
+ 'fetch_limit_factor' => 1.5
+ )
+ );
+
+ return array(
+ "simple" => array(
+ array( 'CirrusSearchCompletionSettings' =>
$simpleProfile ),
+ 10,
+ ' complete me ',
+ null,
+ $simpleProfile, // The profile remains
unmodified here
+ array(
+ 'plain' => array(
+ 'text' => 'complete me ', //
keep trailing white spaces
+ 'completion' => array(
+ 'field' => 'suggest',
+ 'size' => 20, // effect
of fetch_limit_factor
+ ),
+ ),
+ ),
+ ),
+ "simple with fuzzy" => array(
+ array( 'CirrusSearchCompletionSettings' =>
$simpleFuzzy ),
+ 10,
+ ' complete me ',
+ null,
+ $simpleFuzzy, // The profiles remains
unmodified here
+ array(
+ 'plain' => array(
+ 'text' => 'complete me ', //
keep trailing white spaces
+ 'completion' => array(
+ 'field' => 'suggest',
+ 'size' => 20, // effect
of fetch_limit_factor
+ ),
+ ),
+ 'plain-fuzzy' => array(
+ 'text' => 'complete me ', //
keep trailing white spaces
+ 'completion' => array(
+ 'field' => 'suggest',
+ 'size' => 15.0, //
effect of fetch_limit_factor
+ // fuzzy config is
simply copied from the profile
+ 'fuzzy' => array(
+ 'fuzzyness' =>
'AUTO',
+ 'prefix_length'
=> 1,
+ 'unicode_aware'
=> true,
+ ),
+ ),
+ ),
+ ),
+ ),
+ "simple with variants" => array(
+ array( 'CirrusSearchCompletionSettings' =>
$simpleProfile ),
+ 10,
+ ' complete me ',
+ array( ' variant1 ', ' complete me ', '
variant2 ' ),
+ // Profile is updated with extra variant setup
+ // to include an extra discount
+ // ' complete me ' variant duplicate will be
ignored
+ $simpleProfile + array(
+ 'plain-variant-1' => array(
+ 'field' => 'suggest',
+ 'min_query_len' => 0,
+ 'discount' => 1.0 *
CompletionSuggester::VARIANT_EXTRA_DISCOUNT,
+ 'fetch_limit_factor' => 2,
+ 'fallback' => true, // extra
key added, not used for now
+ ),
+ 'plain-variant-2' => array(
+ 'field' => 'suggest',
+ 'min_query_len' => 0,
+ 'discount' => 1.0 *
(CompletionSuggester::VARIANT_EXTRA_DISCOUNT/2),
+ 'fetch_limit_factor' => 2,
+ 'fallback' => true, // extra
key added, not used for now
+ )
+ ),
+ array(
+ 'plain' => array(
+ 'text' => 'complete me ', //
keep trailing white spaces
+ 'completion' => array(
+ 'field' => 'suggest',
+ 'size' => 20, // effect
of fetch_limit_factor
+ ),
+ ),
+ 'plain-variant-1' => array(
+ 'text' => 'variant1 ',
+ 'completion' => array(
+ 'field' => 'suggest',
+ 'size' => 20, // effect
of fetch_limit_factor
+ ),
+ ),
+ 'plain-variant-2' => array(
+ 'text' => 'variant2 ',
+ 'completion' => array(
+ 'field' => 'suggest',
+ 'size' => 20, // effect
of fetch_limit_factor
+ ),
+ ),
+ ),
+ ),
+ );
+ }
+
+ /**
+ * @dataProvider provideMinMaxQueries
+ */
+ public function testMinMaxDefaultProfile( $len, $query ) {
+ global $wgCirrusSearchCompletionProfiles;
+ $config = array( 'CirrusSearchCompletionSettings' =>
$wgCirrusSearchCompletionProfiles['default'] );
+ // Test that we generate at most 4 profiles
+ $completion = new MyCompletionSuggester( new HashSearchConfig(
$config ), 1 );
+ list( $profiles, $suggest ) = $completion->testBuildQuery(
$query, array() );
+ // Unused profiles are kept
+ $this->assertEquals( count(
$wgCirrusSearchCompletionProfiles['default'] ), count( $profiles ) );
+ // Never run more than 4 suggest query (without variants)
+ $this->assertTrue( count( $suggest ) <= 4 );
+ // small queries
+ $this->assertTrue( count( $suggest ) >= 2 );
+
+ if ( $len < 3 ) {
+ // We do not run fuzzy for small queries
+ $this->assertEquals( 2, count( $suggest ) );
+ foreach( $suggest as $key => $value ) {
+ $this->assertArrayNotHasKey( 'fuzzy', $value );
+ }
+ }
+ foreach( $suggest as $key => $value ) {
+ // Make sure the query is truncated otherwise elastic
won't send results
+ $this->assertTrue( mb_strlen( $value['text'] ) <
SuggestBuilder::MAX_INPUT_LENGTH );
+ }
+ foreach( array_keys( $suggest ) as $sug ) {
+ // Makes sure we have the corresponding profile
+ $this->assertArrayHasKey( $sug, $profiles );
+ }
+ }
+
+ public function provideMinMaxQueries() {
+ $queries = array();
+ // The completion should not count extra spaces
+ // This is to avoid enbling costly fuzzy profiles
+ // by cheating with spaces
+ $query = ' ';
+ for( $i = 0; $i < 100; $i++ ) {
+ $test = "Query length {$i}";
+ $queries[$test] = array( $i, $query . ' ' );
+ $query .= '';
+ }
+ return $queries;
+ }
+}
+
+/**
+ * No package visibility in with PHP so we have to subclass...
+ */
+class MyCompletionSuggester extends CompletionSuggester {
+ public function __construct( SearchConfig $config, $limit ) {
+ parent::__construct( new DummyConnection(), $limit, $config,
array( NS_MAIN ), null, "dummy" );
+ }
+
+ public function testBuildQuery( $search, $variants ) {
+ $this->setTermAndVariants( $search, $variants );
+ return $this->buildQuery();
+ }
+}
diff --git a/tests/unit/RescoreBuilderTest.php
b/tests/unit/RescoreBuilderTest.php
index a57bf00..4b349f2 100644
--- a/tests/unit/RescoreBuilderTest.php
+++ b/tests/unit/RescoreBuilderTest.php
@@ -2,6 +2,8 @@
namespace CirrusSearch\Search;
+use CirrusSearch\Test\HashSearchConfig;
+
class RescoreBuilderTest extends \PHPUnit_Framework_TestCase {
public function testFunctionScoreDecorator() {
$func = new FunctionScoreDecorator();
@@ -501,11 +503,5 @@
),
),
);
- }
-}
-
-class HashSearchConfig extends \CirrusSearch\SearchConfig {
- public function __construct( array $settings ) {
- $this->setSource( new \HashConfig( $settings ) );
}
}
diff --git a/tests/unit/Search/SearchSuggestionSetTest.php
b/tests/unit/Search/SearchSuggestionSetTest.php
index 54062da..8b502a4 100644
--- a/tests/unit/Search/SearchSuggestionSetTest.php
+++ b/tests/unit/Search/SearchSuggestionSetTest.php
@@ -29,18 +29,18 @@
public function testAppend() {
$set = SearchSuggestionSet::emptySuggestionSet();
$this->assertEquals( 0, $set->getSize() );
- $set->addSuggestion( new SearchSuggestion( null, null, 3 ) );
+ $set->append( new SearchSuggestion( 3 ) );
$this->assertEquals( 3, $set->getWorstScore() );
$this->assertEquals( 3, $set->getBestScore() );
- $suggestion = new SearchSuggestion( null, null, 4 );
- $set->addSuggestion( $suggestion );
+ $suggestion = new SearchSuggestion( 4 );
+ $set->append( $suggestion );
$this->assertEquals( 2, $set->getWorstScore() );
$this->assertEquals( 3, $set->getBestScore() );
$this->assertEquals( 2, $suggestion->getScore() );
- $suggestion = new SearchSuggestion( null, null, 2 );
- $set->addSuggestion( $suggestion );
+ $suggestion = new SearchSuggestion( 2 );
+ $set->append( $suggestion );
$this->assertEquals( 1, $set->getWorstScore() );
$this->assertEquals( 3, $set->getBestScore() );
$this->assertEquals( 1, $suggestion->getScore() );
@@ -58,24 +58,24 @@
public function testInsertBest() {
$set = SearchSuggestionSet::emptySuggestionSet();
$this->assertEquals( 0, $set->getSize() );
- $set->insertBestSuggestion( new SearchSuggestion( null, null, 3
) );
+ $set->prepend( new SearchSuggestion( 3 ) );
$this->assertEquals( 3, $set->getWorstScore() );
$this->assertEquals( 3, $set->getBestScore() );
- $suggestion = new SearchSuggestion( null, null, 4 );
- $set->insertBestSuggestion( $suggestion );
+ $suggestion = new SearchSuggestion( 4 );
+ $set->prepend( $suggestion );
$this->assertEquals( 3, $set->getWorstScore() );
$this->assertEquals( 4, $set->getBestScore() );
$this->assertEquals( 4, $suggestion->getScore() );
- $suggestion = new SearchSuggestion( null, null, null );
- $set->insertBestSuggestion( $suggestion );
+ $suggestion = new SearchSuggestion( 0 );
+ $set->prepend( $suggestion );
$this->assertEquals( 3, $set->getWorstScore() );
$this->assertEquals( 5, $set->getBestScore() );
$this->assertEquals( 5, $suggestion->getScore() );
- $suggestion = new SearchSuggestion( null, null, 2 );
- $set->insertBestSuggestion( $suggestion );
+ $suggestion = new SearchSuggestion( 2 );
+ $set->prepend( $suggestion );
$this->assertEquals( 3, $set->getWorstScore() );
$this->assertEquals( 6, $set->getBestScore() );
$this->assertEquals( 6, $suggestion->getScore() );
@@ -89,7 +89,7 @@
public function testShrink() {
$set = SearchSuggestionSet::emptySuggestionSet();
for( $i = 0; $i < 100; $i++) {
- $set->addSuggestion( new SearchSuggestion() );
+ $set->append( new SearchSuggestion( 0 ) );
}
$set->shrink( 10 );
$this->assertEquals( 10, $set->getSize() );
@@ -97,4 +97,6 @@
$set->shrink( 0 );
$this->assertEquals( 0, $set->getSize() );
}
+
+ // TODO: test for fromTitles
}
diff --git a/tests/unit/TestUtils.php b/tests/unit/TestUtils.php
new file mode 100644
index 0000000..4db92b4
--- /dev/null
+++ b/tests/unit/TestUtils.php
@@ -0,0 +1,13 @@
+<?php
+namespace CirrusSearch\Test;
+
+class HashSearchConfig extends \CirrusSearch\SearchConfig {
+ public function __construct( array $settings ) {
+ $this->setSource( new \HashConfig( $settings ) );
+ }
+}
+
+class DummyConnection extends \CirrusSearch\Connection {
+ public function __construct() {
+ }
+}
diff --git a/tests/unit/UserTestingTest.php b/tests/unit/UserTestingTest.php
index c8c9b7c..4133e7d 100644
--- a/tests/unit/UserTestingTest.php
+++ b/tests/unit/UserTestingTest.php
@@ -23,6 +23,14 @@
* http://www.gnu.org/copyleft/gpl.html
*/
class UserTestingTest extends \MediaWikiTestCase {
+ /**
+ * @beforeClass
+ */
+ public static function setUpBeforeClass() {
+ ElasticSearchIntermediary::resetExecutionId();
+ UserTesting::resetInstance();
+ }
+
public function testPartitipcationInTest() {
$config = $this->config( 'test' );
$ut = $this->ut( $config, true );
--
To view, visit https://gerrit.wikimedia.org/r/258417
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: merged
Gerrit-Change-Id: Iab87083841481107daf8048ddfabef3c8f284f79
Gerrit-PatchSet: 16
Gerrit-Project: mediawiki/extensions/CirrusSearch
Gerrit-Branch: master
Gerrit-Owner: Smalyshev <[email protected]>
Gerrit-Reviewer: Cindy-the-browser-test-bot <[email protected]>
Gerrit-Reviewer: DCausse <[email protected]>
Gerrit-Reviewer: EBernhardson <[email protected]>
Gerrit-Reviewer: Manybubbles <[email protected]>
Gerrit-Reviewer: Smalyshev <[email protected]>
Gerrit-Reviewer: jenkins-bot <>
_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits