Chad has uploaded a new change for review.
https://gerrit.wikimedia.org/r/140866
Change subject: Prefer articles in a user's language on multilingual wikis
......................................................................
Prefer articles in a user's language on multilingual wikis
Bug: 66829
Change-Id: I52402fb7e2d3c3ee56c64322c13dbec1e92c52d0
---
M CirrusSearch.php
M includes/BuildDocument/PageDataBuilder.php
M includes/ElasticsearchIntermediary.php
M includes/MappingConfigBuilder.php
M includes/Searcher.php
5 files changed, 27 insertions(+), 11 deletions(-)
git pull ssh://gerrit.wikimedia.org:29418/mediawiki/extensions/CirrusSearch
refs/changes/66/140866/1
diff --git a/CirrusSearch.php b/CirrusSearch.php
index da9ac57..aa5dcf2 100644
--- a/CirrusSearch.php
+++ b/CirrusSearch.php
@@ -239,6 +239,11 @@
// Default weight of a talk namespace relative to its corresponding non-talk
namespace.
$wgCirrusSearchTalkNamespaceWeight = 0.25;
+// Default weight of language fields for multilingual wikis.
+// If your wiki is only one language you can leave this at 0, otherwise try
setting it
+// to something like 2.0
+$wgCirrusSearchLanguageWeight = 0.0;
+
// Portion of an article's score that decays with time since it's last update.
Defaults to 0
// meaning don't decay the score at all unless prefer-recent: prefixes the
query.
$wgCirrusSearchPreferRecentDefaultDecayPortion = 0;
diff --git a/includes/BuildDocument/PageDataBuilder.php
b/includes/BuildDocument/PageDataBuilder.php
index f384174..26409d8 100644
--- a/includes/BuildDocument/PageDataBuilder.php
+++ b/includes/BuildDocument/PageDataBuilder.php
@@ -45,6 +45,10 @@
$this->templates();
}
+ // All content types have a language
+ $this->doc->add( 'language',
+ $this->title->getPageLanguage()->getCode() );
+
return $this->doc;
}
diff --git a/includes/ElasticsearchIntermediary.php
b/includes/ElasticsearchIntermediary.php
index 4e5aae6..ba959d8 100644
--- a/includes/ElasticsearchIntermediary.php
+++ b/includes/ElasticsearchIntermediary.php
@@ -24,10 +24,10 @@
*/
class ElasticsearchIntermediary {
/**
- * @var string|null the name or ip of the user for which we're
performing this search or null in the case of
+ * @var User|null user for which we're performing this search or null
in the case of
* requests kicked off by jobs
*/
- private $user = 'nobody';
+ protected $user;
/**
* @var float|null start time of current request or null if none is
running
*/
@@ -58,9 +58,7 @@
* slow. 0 means none count as slow.
*/
protected function __construct( $user, $slowSeconds ) {
- if ( $user ) {
- $this->user = 'User:' . $user->getName(); // name is
the ip address of anonymous users
- }
+ $this->user = $user;
$this->slowMillis = round( 1000 * $slowSeconds );
}
@@ -154,9 +152,7 @@
// Now log and clear our state.
wfDebugLog( 'CirrusSearchRequests', $logMessage );
if ( $this->slowMillis && $took >= $this->slowMillis ) {
- if ( $this->user ) {
- $logMessage .= " for $this->user";
- }
+ $logMessage .= $this->user ? ' for ' .
$this->user->getName() : '';
wfDebugLog( 'CirrusSearchSlowRequests', $logMessage );
}
$this->requestStart = null;
diff --git a/includes/MappingConfigBuilder.php
b/includes/MappingConfigBuilder.php
index e771a2f..5b3d17b 100644
--- a/includes/MappingConfigBuilder.php
+++ b/includes/MappingConfigBuilder.php
@@ -33,7 +33,7 @@
* and change the minor version when it changes but isn't
* incompatible
*/
- const VERSION = '1.3';
+ const VERSION = '1.4';
/**
* Whether to allow prefix searches to match on any word
@@ -145,7 +145,8 @@
'suggest' => array(
'type' => 'string',
'analyzer' => 'suggest',
- )
+ ),
+ 'language' => $this->buildKeywordField(),
),
);
wfRunHooks( 'CirrusSearchMappingConfig', array( &$config, $this
) );
diff --git a/includes/Searcher.php b/includes/Searcher.php
index 0b010d6..77e0767 100644
--- a/includes/Searcher.php
+++ b/includes/Searcher.php
@@ -1197,7 +1197,8 @@
* If there is any boosting to be done munge the the current query to
get it right.
*/
private function installBoosts() {
- global $wgCirrusSearchFunctionRescoreWindowSize;
+ global $wgCirrusSearchFunctionRescoreWindowSize,
+ $wgCirrusSearchLanguageWeight;
// Quick note: At the moment ".isEmpty()" is _much_ faster
then ".empty". Never
// use ".empty". See
https://github.com/elasticsearch/elasticsearch/issues/5086
@@ -1265,6 +1266,15 @@
}
}
+ // Boost pages in a user's language
+ if ( $this->user && $wgCirrusSearchLanguageWeight ) {
+ $match = new \Elastica\Query\Match();
+ $match->setFieldQuery( 'language',
$this->user->getOption( 'language' ) );
+ $functionScore->addBoostFactorFunction(
$wgCirrusSearchLanguageWeight,
+ new \Elastica\Filter\Query( $match ) );
+ $useFunctionScore = true;
+ }
+
if ( !$useFunctionScore ) {
// Nothing to do
return;
--
To view, visit https://gerrit.wikimedia.org/r/140866
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: newchange
Gerrit-Change-Id: I52402fb7e2d3c3ee56c64322c13dbec1e92c52d0
Gerrit-PatchSet: 1
Gerrit-Project: mediawiki/extensions/CirrusSearch
Gerrit-Branch: master
Gerrit-Owner: Chad <[email protected]>
_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits