Manybubbles has uploaded a new change for review.

  https://gerrit.wikimedia.org/r/79791


Change subject: Default to more accurate but slower search_type.
......................................................................

Default to more accurate but slower search_type.

If performing a full text search (the only search where term frequency
is currently important) then ask elasticsearch to make sure it uses
accurate numbers for the term frequency if configured to do so. Default
to configuring to do so.

Bug 53039.

Change-Id: I50c3a8538d120dd5db83d2c40b2365d69324f275
---
M CirrusSearch.body.php
M CirrusSearch.php
2 files changed, 12 insertions(+), 2 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/mediawiki/extensions/CirrusSearch 
refs/changes/91/79791/1

diff --git a/CirrusSearch.body.php b/CirrusSearch.body.php
index c4dfdca..4542c98 100644
--- a/CirrusSearch.body.php
+++ b/CirrusSearch.body.php
@@ -147,6 +147,7 @@
        public function searchText( $term ) {
                wfDebugLog( 'CirrusSearch', "Searching:  $term" );
                global $wgCirrusSearchPhraseSuggestMaxErrors, 
$wgCirrusSearchPhraseSuggestConfidence;
+               global $wgCirrusSearchMoreAccurateScoringMode;
                
                $originalTerm = $term;
 
@@ -157,6 +158,7 @@
 
                $query = new Elastica\Query();
                $query->setFields( array( 'id', 'title', 'namespace', 
'redirect' ) );
+               $queryOptions = array();
 
                $filters = array();
 
@@ -261,13 +263,16 @@
                                        )
                                )
                        ));
+                       if ( $wgCirrusSearchMoreAccurateScoringMode ) {
+                               $queryOptions[ 'search_type' ] = 
'dfs_query_then_fetch';
+                       }
                }
 
                // Perform the search
                $work = new PoolCounterWorkViaCallback( 'CirrusSearch-Search', 
"_elasticsearch", array(
-                       'doWork' => function() use ( $indexType, $originalTerm, 
$query ) {
+                       'doWork' => function() use ( $indexType, $originalTerm, 
$query, $queryOptions ) {
                                try {
-                                       $result = CirrusSearch::getPageType( 
$indexType )->search( $query );
+                                       $result = CirrusSearch::getPageType( 
$indexType )->search( $query, $queryOptions );
                                        wfDebugLog( 'CirrusSearch', 'Search 
completed in ' . $result->getTotalTime() . ' millis' );
                                        return $result;
                                } catch ( 
\Elastica\Exception\ExceptionInterface $e ) {
diff --git a/CirrusSearch.php b/CirrusSearch.php
index 312cc4d..c8e5a64 100644
--- a/CirrusSearch.php
+++ b/CirrusSearch.php
@@ -43,6 +43,11 @@
 // Number of replicas per shard for each index
 $wgCirrusSearchContentReplicaCount = array( 'content' => 1, 'general' => 1 );
 
+// If true CirrusSearch asks Elasticsearch to perform searches using a mode 
that should
+// product more accurate results at the cost of performance. See this for more 
info:
+// 
http://www.elasticsearch.org/blog/understanding-query-then-fetch-vs-dfs-query-then-fetch/
+$wgCirrusSearchMoreAccurateScoringMode = true;
+
 // Maximum number of terms that we ask phrase suggest to correct.
 // See max_errors on 
http://www.elasticsearch.org/guide/reference/api/search/suggest/
 $wgCirrusSearchPhraseSuggestMaxErrors = 5;

-- 
To view, visit https://gerrit.wikimedia.org/r/79791
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: I50c3a8538d120dd5db83d2c40b2365d69324f275
Gerrit-PatchSet: 1
Gerrit-Project: mediawiki/extensions/CirrusSearch
Gerrit-Branch: master
Gerrit-Owner: Manybubbles <never...@wikimedia.org>

_______________________________________________
MediaWiki-commits mailing list
MediaWiki-commits@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to