jenkins-bot has submitted this change and it was merged. ( https://gerrit.wikimedia.org/r/334729 )
Change subject: Deploy TextCat Improvements ...................................................................... Deploy TextCat Improvements Use multiple language model directories. Add generic optimized TextCat config. Add specific TextCat config for Dutch. Alphabetize configs. Change default TextCat Languages to []; explicitly define those for enwiki. Bug: T149324 Bug: T142140 Change-Id: I447463447aed11b0db6ce9507448012705ef9293 Depends-On: I20a82978aa7a046f885dfbdcbee93d4a13f71101 --- M wmf-config/CirrusSearch-common.php M wmf-config/InitialiseSettings.php 2 files changed, 50 insertions(+), 27 deletions(-) Approvals: Thcipriani: Looks good to me, approved EBernhardson: Looks good to me, but someone else must approve jenkins-bot: Verified diff --git a/wmf-config/CirrusSearch-common.php b/wmf-config/CirrusSearch-common.php index 4346d94..08f99e8 100644 --- a/wmf-config/CirrusSearch-common.php +++ b/wmf-config/CirrusSearch-common.php @@ -197,7 +197,16 @@ $wgCirrusSearchEnableAltLanguage = $wmgCirrusSearchEnableAltLanguage; $wgCirrusSearchLanguageDetectors = $wmgCirrusSearchLanguageDetectors; $wgCirrusSearchTextcatLanguages = $wmgCirrusSearchTextcatLanguages; -$wgCirrusSearchTextcatModel = "$IP/vendor/wikimedia/textcat/LM-query"; +$wgCirrusSearchTextcatModel = [ "$IP/vendor/wikimedia/textcat/LM-query", "$IP/vendor/wikimedia/textcat/LM" ]; +$wgCirrusSearchTextcatConfig = [ + 'maxNgrams' => 9000, + 'maxReturnedLanguages' => 1, + 'resultsRatio' => 1.06, + 'minInputLength' => 3, + 'maxProportion' => 0.85, + 'langBoostScore' => 0.14, + 'numBoostedLangs' => 2, +]; $wgHooks['CirrusSearchMappingConfig'][] = function( array &$config, $mappingConfigBuilder ) { $config['page']['properties']['popularity_score'] = [ diff --git a/wmf-config/InitialiseSettings.php b/wmf-config/InitialiseSettings.php index 6b76326..50147ce 100644 --- a/wmf-config/InitialiseSettings.php +++ b/wmf-config/InitialiseSettings.php @@ -17267,14 +17267,15 @@ 'wmgCirrusSearchLanguageDetectors' => [ 'default' => [], - 'enwiki' => [ 'textcat' => 'CirrusSearch\\LanguageDetector\\TextCat' ], 'dewiki' => [ 'textcat' => 'CirrusSearch\\LanguageDetector\\TextCat' ], + 'enwiki' => [ 'textcat' => 'CirrusSearch\\LanguageDetector\\TextCat' ], 'eswiki' => [ 'textcat' => 'CirrusSearch\\LanguageDetector\\TextCat' ], - 'itwiki' => [ 'textcat' => 'CirrusSearch\\LanguageDetector\\TextCat' ], 'frwiki' => [ 'textcat' => 'CirrusSearch\\LanguageDetector\\TextCat' ], + 'itwiki' => [ 'textcat' => 'CirrusSearch\\LanguageDetector\\TextCat' ], + 'jawiki' => [ 'textcat' => 'CirrusSearch\\LanguageDetector\\TextCat' ], + 'nlwiki' => [ 'textcat' => 'CirrusSearch\\LanguageDetector\\TextCat' ], 'ptwiki' => [ 'textcat' => 'CirrusSearch\\LanguageDetector\\TextCat' ], 'ruwiki' => [ 'textcat' => 'CirrusSearch\\LanguageDetector\\TextCat' ], - 'jawiki' => [ 'textcat' => 'CirrusSearch\\LanguageDetector\\TextCat' ], ], // Enable interwiki search by language detection. The list of language @@ -17285,44 +17286,57 @@ // bucket reasons). 'wmgCirrusSearchEnableAltLanguage' => [ 'default' => false, - 'enwiki' => true, 'dewiki' => true, + 'enwiki' => true, 'eswiki' => true, - 'itwiki' => true, 'frwiki' => true, + 'itwiki' => true, + 'jawiki' => true, + 'nlwiki' => true, 'ptwiki' => true, 'ruwiki' => true, - 'jawiki' => true, ], 'wmgCirrusSearchTextcatLanguages' => [ - 'default' => [ - 'en', 'es', 'zh', 'pt', 'ar', 'ru', 'fa', 'ko', - 'bn', 'bg', 'hi', 'el', 'ja', 'ta', 'th', 'he' + 'default' => [], + 'dewiki' => [ + 'de', 'en', 'la', 'it', 'es', 'fr', 'zh', 'pl', + 'vi', 'el', 'ru', 'ar', 'hi', 'th', 'ko', 'ja', ], - 'frwiki' => [ - 'fr', 'en', 'ar', 'ru', 'zh', 'th', 'el', 'hy', - 'he', 'ko', + 'enwiki' => [ + 'en', 'zh', 'es', 'ar', 'de', 'fa', 'fr', 'id', + 'pl', 'ru', 'vi', 'it', 'ja', 'pt', 'cs', 'bn', + 'hr', 'he', 'no', 'af', 'is', 'tl', 'th', 'hu', + 'ga', 'ko', 'uk', 'ur', 'hi', 'el', 'te', 'ka', ], 'eswiki' => [ - 'es', 'en', 'ru', 'zh', 'ar', 'ja', + 'es', 'en', 'la', 'ru', 'zh', 'pt', 'it', 'fr', + 'de', 'ar', 'ja', + ], + 'frwiki' => [ + 'fr', 'en', 'ar', 'pt', 'de', 'es', 'ru', 'zh', + 'nl', 'pl', 'it', 'th', 'sv', 'la', 'is', 'hy', + 'hu', 'br', 'el', 'he', 'ko', ], 'itwiki' => [ - 'it', 'en', 'ru', 'ar', 'zh', 'ja', 'el', 'ko', - ], - 'dewiki' => [ - 'de', 'en', 'zh', 'el', 'ru', 'ar', 'hi', 'th', - 'ko', 'ja', - ], - 'ptwiki' => [ - 'pt', 'en', 'ru', 'he', 'ar', 'zh', 'ko', 'el', - ], - 'ruwiki' => [ - 'ru', 'en', 'uk', 'ka', 'hy', 'ja', 'ar', 'he', - 'zh', + 'it', 'en', 'de', 'ru', 'ar', 'zh', 'pl', 'el', + 'ko', ], 'jawiki' => [ - 'ja', 'en', 'ru', 'ko', 'ar', 'he', + 'ja', 'en', 'zh', 'ko', 'de', 'ar', 'he', + ], + 'nlwiki' => [ + 'nl', 'en', 'fr', 'de', 'es', 'la', 'zh', 'pl', + 'ar', 'vi', 'pt', 'my', 'ko', 'hr', 'da', 'cs', + 'el', 'he', 'ja', 'ru', + ], + 'ptwiki' => [ + 'pt', 'en', 'tl', 'ru', 'fr', 'he', 'ar', 'zh', + 'ko', 'el', + ], + 'ruwiki' => [ + 'ru', 'en', 'uk', 'de', 'ka', 'hy', 'lv', 'ja', + 'fi', 'es', 'ar', 'he', 'zh', ], ], -- To view, visit https://gerrit.wikimedia.org/r/334729 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: merged Gerrit-Change-Id: I447463447aed11b0db6ce9507448012705ef9293 Gerrit-PatchSet: 5 Gerrit-Project: operations/mediawiki-config Gerrit-Branch: master Gerrit-Owner: Tjones <tjo...@wikimedia.org> Gerrit-Reviewer: DCausse <dcau...@wikimedia.org> Gerrit-Reviewer: EBernhardson <ebernhard...@wikimedia.org> Gerrit-Reviewer: Florianschmidtwelzow <florian.schmidt.stargatewis...@gmail.com> Gerrit-Reviewer: Smalyshev <smalys...@wikimedia.org> Gerrit-Reviewer: Thcipriani <tcipri...@wikimedia.org> Gerrit-Reviewer: Tjones <tjo...@wikimedia.org> Gerrit-Reviewer: jenkins-bot <> _______________________________________________ MediaWiki-commits mailing list MediaWiki-commits@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits