Manybubbles has uploaded a new change for review. https://gerrit.wikimedia.org/r/79791
Change subject: Default to more accurate but slower search_type. ...................................................................... Default to more accurate but slower search_type. If performing a full text search (the only search where term frequency is currently important) then ask elasticsearch to make sure it uses accurate numbers for the term frequency if configured to do so. Default to configuring to do so. Bug 53039. Change-Id: I50c3a8538d120dd5db83d2c40b2365d69324f275 --- M CirrusSearch.body.php M CirrusSearch.php 2 files changed, 12 insertions(+), 2 deletions(-) git pull ssh://gerrit.wikimedia.org:29418/mediawiki/extensions/CirrusSearch refs/changes/91/79791/1 diff --git a/CirrusSearch.body.php b/CirrusSearch.body.php index c4dfdca..4542c98 100644 --- a/CirrusSearch.body.php +++ b/CirrusSearch.body.php @@ -147,6 +147,7 @@ public function searchText( $term ) { wfDebugLog( 'CirrusSearch', "Searching: $term" ); global $wgCirrusSearchPhraseSuggestMaxErrors, $wgCirrusSearchPhraseSuggestConfidence; + global $wgCirrusSearchMoreAccurateScoringMode; $originalTerm = $term; @@ -157,6 +158,7 @@ $query = new Elastica\Query(); $query->setFields( array( 'id', 'title', 'namespace', 'redirect' ) ); + $queryOptions = array(); $filters = array(); @@ -261,13 +263,16 @@ ) ) )); + if ( $wgCirrusSearchMoreAccurateScoringMode ) { + $queryOptions[ 'search_type' ] = 'dfs_query_then_fetch'; + } } // Perform the search $work = new PoolCounterWorkViaCallback( 'CirrusSearch-Search', "_elasticsearch", array( - 'doWork' => function() use ( $indexType, $originalTerm, $query ) { + 'doWork' => function() use ( $indexType, $originalTerm, $query, $queryOptions ) { try { - $result = CirrusSearch::getPageType( $indexType )->search( $query ); + $result = CirrusSearch::getPageType( $indexType )->search( $query, $queryOptions ); wfDebugLog( 'CirrusSearch', 'Search completed in ' . $result->getTotalTime() . ' millis' ); return $result; } catch ( \Elastica\Exception\ExceptionInterface $e ) { diff --git a/CirrusSearch.php b/CirrusSearch.php index 312cc4d..c8e5a64 100644 --- a/CirrusSearch.php +++ b/CirrusSearch.php @@ -43,6 +43,11 @@ // Number of replicas per shard for each index $wgCirrusSearchContentReplicaCount = array( 'content' => 1, 'general' => 1 ); +// If true CirrusSearch asks Elasticsearch to perform searches using a mode that should +// product more accurate results at the cost of performance. See this for more info: +// http://www.elasticsearch.org/blog/understanding-query-then-fetch-vs-dfs-query-then-fetch/ +$wgCirrusSearchMoreAccurateScoringMode = true; + // Maximum number of terms that we ask phrase suggest to correct. // See max_errors on http://www.elasticsearch.org/guide/reference/api/search/suggest/ $wgCirrusSearchPhraseSuggestMaxErrors = 5; -- To view, visit https://gerrit.wikimedia.org/r/79791 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: newchange Gerrit-Change-Id: I50c3a8538d120dd5db83d2c40b2365d69324f275 Gerrit-PatchSet: 1 Gerrit-Project: mediawiki/extensions/CirrusSearch Gerrit-Branch: master Gerrit-Owner: Manybubbles <never...@wikimedia.org> _______________________________________________ MediaWiki-commits mailing list MediaWiki-commits@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits