Chad has uploaded a new change for review.

  https://gerrit.wikimedia.org/r/140866

Change subject: Prefer articles in a user's language on multilingual wikis
......................................................................

Prefer articles in a user's language on multilingual wikis

Bug: 66829
Change-Id: I52402fb7e2d3c3ee56c64322c13dbec1e92c52d0
---
M CirrusSearch.php
M includes/BuildDocument/PageDataBuilder.php
M includes/ElasticsearchIntermediary.php
M includes/MappingConfigBuilder.php
M includes/Searcher.php
5 files changed, 27 insertions(+), 11 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/mediawiki/extensions/CirrusSearch 
refs/changes/66/140866/1

diff --git a/CirrusSearch.php b/CirrusSearch.php
index da9ac57..aa5dcf2 100644
--- a/CirrusSearch.php
+++ b/CirrusSearch.php
@@ -239,6 +239,11 @@
 // Default weight of a talk namespace relative to its corresponding non-talk 
namespace.
 $wgCirrusSearchTalkNamespaceWeight = 0.25;
 
+// Default weight of language fields for multilingual wikis.
+// If your wiki is only one language you can leave this at 0, otherwise try 
setting it
+// to something like 2.0
+$wgCirrusSearchLanguageWeight = 0.0;
+
 // Portion of an article's score that decays with time since it's last update. 
 Defaults to 0
 // meaning don't decay the score at all unless prefer-recent: prefixes the 
query.
 $wgCirrusSearchPreferRecentDefaultDecayPortion = 0;
diff --git a/includes/BuildDocument/PageDataBuilder.php 
b/includes/BuildDocument/PageDataBuilder.php
index f384174..26409d8 100644
--- a/includes/BuildDocument/PageDataBuilder.php
+++ b/includes/BuildDocument/PageDataBuilder.php
@@ -45,6 +45,10 @@
                                $this->templates();
                }
 
+               // All content types have a language
+               $this->doc->add( 'language',
+                       $this->title->getPageLanguage()->getCode() );
+
                return $this->doc;
        }
 
diff --git a/includes/ElasticsearchIntermediary.php 
b/includes/ElasticsearchIntermediary.php
index 4e5aae6..ba959d8 100644
--- a/includes/ElasticsearchIntermediary.php
+++ b/includes/ElasticsearchIntermediary.php
@@ -24,10 +24,10 @@
  */
 class ElasticsearchIntermediary {
        /**
-        * @var string|null the name or ip of the user for which we're 
performing this search or null in the case of
+        * @var User|null user for which we're performing this search or null 
in the case of
         * requests kicked off by jobs
         */
-       private $user = 'nobody';
+       protected $user;
        /**
         * @var float|null start time of current request or null if none is 
running
         */
@@ -58,9 +58,7 @@
         * slow.  0 means none count as slow.
         */
        protected function __construct( $user, $slowSeconds ) {
-               if ( $user ) {
-                       $this->user = 'User:' . $user->getName(); // name is 
the ip address of anonymous users
-               }
+               $this->user = $user;
                $this->slowMillis = round( 1000 * $slowSeconds );
        }
 
@@ -154,9 +152,7 @@
                // Now log and clear our state.
                wfDebugLog( 'CirrusSearchRequests', $logMessage );
                if ( $this->slowMillis && $took >= $this->slowMillis ) {
-                       if ( $this->user ) {
-                               $logMessage .= " for $this->user";
-                       }
+                       $logMessage .= $this->user ? ' for ' . 
$this->user->getName() : '';
                        wfDebugLog( 'CirrusSearchSlowRequests', $logMessage );
                }
                $this->requestStart = null;
diff --git a/includes/MappingConfigBuilder.php 
b/includes/MappingConfigBuilder.php
index e771a2f..5b3d17b 100644
--- a/includes/MappingConfigBuilder.php
+++ b/includes/MappingConfigBuilder.php
@@ -33,7 +33,7 @@
         * and change the minor version when it changes but isn't
         * incompatible
         */
-       const VERSION = '1.3';
+       const VERSION = '1.4';
 
        /**
         * Whether to allow prefix searches to match on any word
@@ -145,7 +145,8 @@
                                'suggest' => array(
                                        'type' => 'string',
                                        'analyzer' => 'suggest',
-                               )
+                               ),
+                               'language' => $this->buildKeywordField(),
                        ),
                );
                wfRunHooks( 'CirrusSearchMappingConfig', array( &$config, $this 
) );
diff --git a/includes/Searcher.php b/includes/Searcher.php
index 0b010d6..77e0767 100644
--- a/includes/Searcher.php
+++ b/includes/Searcher.php
@@ -1197,7 +1197,8 @@
         * If there is any boosting to be done munge the the current query to 
get it right.
         */
        private function installBoosts() {
-               global $wgCirrusSearchFunctionRescoreWindowSize;
+               global $wgCirrusSearchFunctionRescoreWindowSize,
+                       $wgCirrusSearchLanguageWeight;
 
                // Quick note:  At the moment ".isEmpty()" is _much_ faster 
then ".empty".  Never
                // use ".empty".  See 
https://github.com/elasticsearch/elasticsearch/issues/5086
@@ -1265,6 +1266,15 @@
                        }
                }
 
+               // Boost pages in a user's language
+               if ( $this->user && $wgCirrusSearchLanguageWeight ) {
+                       $match = new \Elastica\Query\Match();
+                       $match->setFieldQuery( 'language', 
$this->user->getOption( 'language' ) );
+                       $functionScore->addBoostFactorFunction( 
$wgCirrusSearchLanguageWeight,
+                               new \Elastica\Filter\Query( $match ) );
+                       $useFunctionScore = true;
+               }
+
                if ( !$useFunctionScore ) {
                        // Nothing to do
                        return;

-- 
To view, visit https://gerrit.wikimedia.org/r/140866
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: I52402fb7e2d3c3ee56c64322c13dbec1e92c52d0
Gerrit-PatchSet: 1
Gerrit-Project: mediawiki/extensions/CirrusSearch
Gerrit-Branch: master
Gerrit-Owner: Chad <[email protected]>

_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to