jenkins-bot has submitted this change and it was merged. ( 
https://gerrit.wikimedia.org/r/334729 )

Change subject: Deploy TextCat Improvements
......................................................................


Deploy TextCat Improvements

Use multiple language model directories.
Add generic optimized TextCat config.
Add specific TextCat config for Dutch.
Alphabetize configs.
Change default TextCat Languages to []; explicitly define those for enwiki.

Bug: T149324
Bug: T142140
Change-Id: I447463447aed11b0db6ce9507448012705ef9293
Depends-On: I20a82978aa7a046f885dfbdcbee93d4a13f71101
---
M wmf-config/CirrusSearch-common.php
M wmf-config/InitialiseSettings.php
2 files changed, 50 insertions(+), 27 deletions(-)

Approvals:
  Thcipriani: Looks good to me, approved
  EBernhardson: Looks good to me, but someone else must approve
  jenkins-bot: Verified



diff --git a/wmf-config/CirrusSearch-common.php 
b/wmf-config/CirrusSearch-common.php
index 4346d94..08f99e8 100644
--- a/wmf-config/CirrusSearch-common.php
+++ b/wmf-config/CirrusSearch-common.php
@@ -197,7 +197,16 @@
 $wgCirrusSearchEnableAltLanguage = $wmgCirrusSearchEnableAltLanguage;
 $wgCirrusSearchLanguageDetectors = $wmgCirrusSearchLanguageDetectors;
 $wgCirrusSearchTextcatLanguages = $wmgCirrusSearchTextcatLanguages;
-$wgCirrusSearchTextcatModel = "$IP/vendor/wikimedia/textcat/LM-query";
+$wgCirrusSearchTextcatModel = [ "$IP/vendor/wikimedia/textcat/LM-query", 
"$IP/vendor/wikimedia/textcat/LM" ];
+$wgCirrusSearchTextcatConfig = [
+       'maxNgrams' => 9000,
+       'maxReturnedLanguages' => 1,
+       'resultsRatio' => 1.06,
+       'minInputLength' => 3,
+       'maxProportion' => 0.85,
+       'langBoostScore' => 0.14,
+       'numBoostedLangs' => 2,
+];
 
 $wgHooks['CirrusSearchMappingConfig'][] = function( array &$config, 
$mappingConfigBuilder ) {
        $config['page']['properties']['popularity_score'] = [
diff --git a/wmf-config/InitialiseSettings.php 
b/wmf-config/InitialiseSettings.php
index 6b76326..50147ce 100644
--- a/wmf-config/InitialiseSettings.php
+++ b/wmf-config/InitialiseSettings.php
@@ -17267,14 +17267,15 @@
 
 'wmgCirrusSearchLanguageDetectors' => [
        'default' => [],
-       'enwiki' => [ 'textcat' => 'CirrusSearch\\LanguageDetector\\TextCat' ],
        'dewiki' => [ 'textcat' => 'CirrusSearch\\LanguageDetector\\TextCat' ],
+       'enwiki' => [ 'textcat' => 'CirrusSearch\\LanguageDetector\\TextCat' ],
        'eswiki' => [ 'textcat' => 'CirrusSearch\\LanguageDetector\\TextCat' ],
-       'itwiki' => [ 'textcat' => 'CirrusSearch\\LanguageDetector\\TextCat' ],
        'frwiki' => [ 'textcat' => 'CirrusSearch\\LanguageDetector\\TextCat' ],
+       'itwiki' => [ 'textcat' => 'CirrusSearch\\LanguageDetector\\TextCat' ],
+       'jawiki' => [ 'textcat' => 'CirrusSearch\\LanguageDetector\\TextCat' ],
+       'nlwiki' => [ 'textcat' => 'CirrusSearch\\LanguageDetector\\TextCat' ],
        'ptwiki' => [ 'textcat' => 'CirrusSearch\\LanguageDetector\\TextCat' ],
        'ruwiki' => [ 'textcat' => 'CirrusSearch\\LanguageDetector\\TextCat' ],
-       'jawiki' => [ 'textcat' => 'CirrusSearch\\LanguageDetector\\TextCat' ],
 ],
 
 // Enable interwiki search by language detection. The list of language
@@ -17285,44 +17286,57 @@
 // bucket reasons).
 'wmgCirrusSearchEnableAltLanguage' => [
        'default' => false,
-       'enwiki' => true,
        'dewiki' => true,
+       'enwiki' => true,
        'eswiki' => true,
-       'itwiki' => true,
        'frwiki' => true,
+       'itwiki' => true,
+       'jawiki' => true,
+       'nlwiki' => true,
        'ptwiki' => true,
        'ruwiki' => true,
-       'jawiki' => true,
 ],
 
 'wmgCirrusSearchTextcatLanguages' => [
-       'default' => [
-               'en', 'es', 'zh', 'pt', 'ar', 'ru', 'fa', 'ko',
-               'bn', 'bg', 'hi', 'el', 'ja', 'ta', 'th', 'he'
+       'default' => [],
+       'dewiki' => [
+               'de', 'en', 'la', 'it', 'es', 'fr', 'zh', 'pl',
+               'vi', 'el', 'ru', 'ar', 'hi', 'th', 'ko', 'ja',
        ],
-       'frwiki' => [
-               'fr', 'en', 'ar', 'ru', 'zh', 'th', 'el', 'hy',
-               'he', 'ko',
+       'enwiki' => [
+               'en', 'zh', 'es', 'ar', 'de', 'fa', 'fr', 'id',
+               'pl', 'ru', 'vi', 'it', 'ja', 'pt', 'cs', 'bn',
+               'hr', 'he', 'no', 'af', 'is', 'tl', 'th', 'hu',
+               'ga', 'ko', 'uk', 'ur', 'hi', 'el', 'te', 'ka',
        ],
        'eswiki' => [
-               'es', 'en', 'ru', 'zh', 'ar', 'ja',
+               'es', 'en', 'la', 'ru', 'zh', 'pt', 'it', 'fr',
+               'de', 'ar', 'ja',
+       ],
+       'frwiki' => [
+               'fr', 'en', 'ar', 'pt', 'de', 'es', 'ru', 'zh',
+               'nl', 'pl', 'it', 'th', 'sv', 'la', 'is', 'hy',
+               'hu', 'br', 'el', 'he', 'ko',
        ],
        'itwiki' => [
-               'it', 'en', 'ru', 'ar', 'zh', 'ja', 'el', 'ko',
-       ],
-       'dewiki' => [
-               'de', 'en', 'zh', 'el', 'ru', 'ar', 'hi', 'th',
-               'ko', 'ja',
-       ],
-       'ptwiki' => [
-               'pt', 'en', 'ru', 'he', 'ar', 'zh', 'ko', 'el',
-       ],
-       'ruwiki' => [
-               'ru', 'en', 'uk', 'ka', 'hy', 'ja', 'ar', 'he',
-               'zh',
+               'it', 'en', 'de', 'ru', 'ar', 'zh', 'pl', 'el',
+               'ko',
        ],
        'jawiki' => [
-               'ja', 'en', 'ru', 'ko', 'ar', 'he',
+               'ja', 'en', 'zh', 'ko', 'de', 'ar', 'he',
+       ],
+       'nlwiki' => [
+               'nl', 'en', 'fr', 'de', 'es', 'la', 'zh', 'pl',
+               'ar', 'vi', 'pt', 'my', 'ko', 'hr', 'da', 'cs',
+               'el', 'he', 'ja', 'ru',
+       ],
+       'ptwiki' => [
+               'pt', 'en', 'tl', 'ru', 'fr', 'he', 'ar', 'zh',
+               'ko', 'el',
+       ],
+       'ruwiki' => [
+               'ru', 'en', 'uk', 'de', 'ka', 'hy', 'lv', 'ja',
+               'fi', 'es', 'ar', 'he', 'zh',
        ],
 ],
 

-- 
To view, visit https://gerrit.wikimedia.org/r/334729
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: merged
Gerrit-Change-Id: I447463447aed11b0db6ce9507448012705ef9293
Gerrit-PatchSet: 5
Gerrit-Project: operations/mediawiki-config
Gerrit-Branch: master
Gerrit-Owner: Tjones <tjo...@wikimedia.org>
Gerrit-Reviewer: DCausse <dcau...@wikimedia.org>
Gerrit-Reviewer: EBernhardson <ebernhard...@wikimedia.org>
Gerrit-Reviewer: Florianschmidtwelzow <florian.schmidt.stargatewis...@gmail.com>
Gerrit-Reviewer: Smalyshev <smalys...@wikimedia.org>
Gerrit-Reviewer: Thcipriani <tcipri...@wikimedia.org>
Gerrit-Reviewer: Tjones <tjo...@wikimedia.org>
Gerrit-Reviewer: jenkins-bot <>

_______________________________________________
MediaWiki-commits mailing list
MediaWiki-commits@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to