Thcipriani has uploaded a new change for review.

  https://gerrit.wikimedia.org/r/307350

Change subject: Revert "Revert "CirrusSearch BM25 A/B test config""
......................................................................

Revert "Revert "CirrusSearch BM25 A/B test config""

This reverts commit 15e4dcafcbc053de433767181a601bd1e8b7ec5a.

Change-Id: Ia26197e9b7c6e496d8d7e21b629bb0725974286b
---
M tests/cirrusTest.php
M wmf-config/CirrusSearch-common.php
2 files changed, 309 insertions(+), 0 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/operations/mediawiki-config 
refs/changes/50/307350/1

diff --git a/tests/cirrusTest.php b/tests/cirrusTest.php
index 06268d5..7772c42 100644
--- a/tests/cirrusTest.php
+++ b/tests/cirrusTest.php
@@ -101,6 +101,9 @@
                // variables that would have been setup elsewhere, perhaps in 
mediawiki
                // default settings or by CommonSettings.php, or by 
CirrusSearch.php,
                // but none of those are a part of this repository
+               $wgCirrusSearchRescoreProfiles = array();
+               $wgCirrusSearchRescoreFunctionScoreChains = array();
+               $wgCirrusSearchFullTextQueryBuilderProfiles = array();
                $wgJobTypeConf = array( 'default' => array() );
                $wgCirrusSearchWeights = array();
                $wgCirrusSearchNamespaceWeights = array();
diff --git a/wmf-config/CirrusSearch-common.php 
b/wmf-config/CirrusSearch-common.php
index e20cbea..0b73b1c 100644
--- a/wmf-config/CirrusSearch-common.php
+++ b/wmf-config/CirrusSearch-common.php
@@ -33,6 +33,312 @@
 # Enable user testing
 $wgCirrusSearchUserTesting = $wmgCirrusSearchUserTesting;
 
+# BM25 A/B test, enabled only on enwiki to avoid conflicts with
+# with TextCat language detection
+if ( $wgDBname === 'enwiki' ) {
+# UserTesting requires that a var exists in $GLOBALS before setting it
+# All extra vars needed to customize rescore weights
+       $wgCirrusSearchPageViewsW = 1.0;
+       $wgCirrusSearchPageViewsK = 1.0;
+       $wgCirrusSearchPageViewsA = 1.0;
+       $wgCirrusSearchIncLinksW = 1.0;
+       $wgCirrusSearchIncLinksK = 1.0;
+       $wgCirrusSearchIncLinksA = 1.0;
+       $wgCirrusSearchIncLinksAloneW = 1.0;
+       $wgCirrusSearchIncLinksAloneK = 1.0;
+       $wgCirrusSearchIncLinksAloneA = 1.0;
+
+       $wgCirrusSearchUserTesting['bm25'] = [
+               'sampleRate' => 0,
+               'globals' => [
+                       'wgCirrusSearchBoostTemplates' => [],
+                       'wgCirrusSearchRescoreProfiles' => 
$wgCirrusSearchRescoreProfiles + [
+                               'wsum_inclinks' => [
+                                       'supported_namespaces' => 'all',
+                                       'rescore' => [
+                                               [
+                                                       'window' => 8192,
+                                                       'window_size_override' 
=> 'CirrusSearchFunctionRescoreWindowSize',
+                                                       'query_weight' => 1.0,
+                                                       'rescore_query_weight' 
=> 1.0,
+                                                       'score_mode' => 'total',
+                                                       'type' => 
'function_score',
+                                                       'function_chain' => 
'wsum_inclinks'
+                                               ],
+                                               [
+                                                       'window' => 8192,
+                                                       'window_size_override' 
=> 'CirrusSearchFunctionRescoreWindowSize',
+                                                       'query_weight' => 1.0,
+                                                       'rescore_query_weight' 
=> 1.0,
+                                                       'score_mode' => 
'multiply',
+                                                       'type' => 
'function_score',
+                                                       'function_chain' => 
'optional_chain'
+                                               ],
+                                       ],
+                               ],
+                               'wsum_inclinks_pv' => [
+                                       'supported_namespaces' => 'content',
+                                       'fallback_profile' => 'wsum_inclinks',
+                                       'rescore' => [
+                                               [
+                                                       'window' => 8192,
+                                                       'window_size_override' 
=> 'CirrusSearchFunctionRescoreWindowSize',
+                                                       'query_weight' => 1.0,
+                                                       'rescore_query_weight' 
=> 1.0,
+                                                       'score_mode' => 'total',
+                                                       'type' => 
'function_score',
+                                                       'function_chain' => 
'wsum_inclinks_pv'
+                                               ],
+                                               [
+                                                       'window' => 8192,
+                                                       'window_size_override' 
=> 'CirrusSearchFunctionRescoreWindowSize',
+                                                       'query_weight' => 1.0,
+                                                       'rescore_query_weight' 
=> 1.0,
+                                                       'score_mode' => 
'multiply',
+                                                       'type' => 
'function_score',
+                                                       'function_chain' => 
'optional_chain'
+                                               ],
+                                       ],
+                               ],
+                       ],
+                       'wgCirrusSearchRescoreFunctionScoreChains' => 
$wgCirrusSearchRescoreFunctionScoreChains + [
+                               'wsum_inclinks' => [
+                                       'functions' => [
+                                               [
+                                                       'type' => 'satu',
+                                                       'weight' => [
+                                                               'value' => 1.2,
+                                                               
'config_override' => 'CirrusSearchIncLinksAloneW',
+                                                               
'uri_param_override' => 'cirrusIncLinksAloneW',
+                                                       ],
+                                                       'params' => [
+                                                               'field' => 
'incoming_links',
+                                                               'k' => [
+                                                                       'value' 
=> 10,
+                                                                       
'config_override' => 'CirrusSearchIncLinksAloneK',
+                                                                       
'uri_param_override' => 'cirrusIncLinksAloneK',
+                                                               ],
+                                                               'a' => [
+                                                                       'value' 
=> 1,
+                                                                       
'config_override' => 'CirrusSearchIncLinksAloneA',
+                                                                       
'uri_param_override' => 'cirrusIncLinksAloneA',
+                                                               ]
+                                                       ],
+                                               ],
+                                       ],
+                               ],
+                               'wsum_inclinks_pv' => [
+                                       'score_mode' => 'sum',
+                                       'boost_mode' => 'sum',
+                                       'functions' => [
+                                               [
+                                                       'type' => 'satu',
+                                                       'weight' => [
+                                                               'value' => 1.8,
+                                                               
'config_override' => 'CirrusSearchPageViewsW',
+                                                               
'uri_param_override' => 'cirrusPageViewsW',
+                                                       ],
+                                                       'params' => [
+                                                               'field' => 
'popularity_score',
+                                                               'k' => [
+                                                                       'value' 
=> 0.0000007,
+                                                                       
'config_override' => 'CirrusSearchPageViewsK',
+                                                                       
'uri_param_override' => 'cirrusPageViewsK',
+                                                               ],
+                                                               'a' => [
+                                                                       'value' 
=> 1,
+                                                                       
'config_override' => 'CirrusSearchPageViewsA',
+                                                                       
'uri_param_override' => 'cirrusPageViewsA',
+                                                               ],
+                                                       ],
+                                               ],
+                                               [
+                                                       'type' => 'satu',
+                                                       'weight' => [
+                                                               'value' => 0.6,
+                                                               
'config_override' => 'CirrusSearchIncLinksW',
+                                                               
'uri_param_override' => 'cirrusIncLinkssW',
+                                                       ],
+                                                       'params' => [
+                                                               'field' => 
'incoming_links',
+                                                               'k' => [
+                                                                       'value' 
=> 10,
+                                                                       
'config_override' => 'CirrusSearchIncLinksK',
+                                                                       
'uri_param_override' => 'cirrusIncLinksK',
+                                                               ],
+                                                               'a' => [
+                                                                       'value' 
=> 1,
+                                                                       
'config_override' => 'CirrusSearchIncLinksA',
+                                                                       
'uri_param_override' => 'cirrusIncLinksA',
+                                                               ],
+                                                       ],
+                                               ],
+                                       ],
+                               ],
+                       ],
+                       'wgCirrusSearchFullTextQueryBuilderProfiles' => 
$wgCirrusSearchFullTextQueryBuilderProfiles + [
+                               'perfield_builder' => [
+                                       'builder_class' => 
\CirrusSearch\Query\FullTextSimpleMatchQueryBuilder::class,
+                                       'settings' => [
+                                               'default_min_should_match' => 
'1',
+                                               'default_query_type' => 
'most_fields',
+                                               'default_stem_weight' => 3.0,
+                                               'fields' => [
+                                                       'title' => 0.3,
+                                                       'redirect.title' => [
+                                                               'boost' => 0.27,
+                                                               'in_dismax' => 
'redirects_or_shingles'
+                                                       ],
+                                                       'suggest' => [
+                                                               'is_plain' => 
true,
+                                                               'boost' => 0.20,
+                                                               'in_dismax' => 
'redirects_or_shingles',
+                                                       ],
+                                                       'category' => 0.05,
+                                                       'heading' => 0.05,
+                                                       'text' => [
+                                                               'boost' => 0.6,
+                                                               'in_dismax' => 
'text_and_opening_text',
+                                                       ],
+                                                       'opening_text' => [
+                                                               'boost' => 0.5,
+                                                               'in_dismax' => 
'text_and_opening_text',
+                                                       ],
+                                                       'auxiliary_text' => 
0.05,
+                                                       'file_text' => 0.5,
+                                               ],
+                                               'phrase_rescore_fields' => [
+                                                       // very low (don't 
forget it's multiplied by 10 by default)
+                                                       // Use the all field to 
avoid loading positions on another field,
+                                                       // score is roughly the 
same when used on text
+                                                       'all' => 0.03,
+                                                       'all.plain' => 0.05,
+                                               ],
+                                       ],
+                               ],
+                       ],
+               ],
+               'buckets' => [
+                       // Prod settings on eqiad
+                       // nDCG@5 0.2772 (enwiki scores excluded)
+                       'control' => [
+                               'trigger' => 'bm25:control',
+                               'globals' => [
+                                       
'wgCirrusSearchFullTextQueryBuilderProfile' => 'default',
+                                       'wgCirrusSearchExtraBackendLatency' => 
30000,
+                               ],
+                       ],
+                       // BM25+allfield and QueryString, inclinks as a sum
+                       // nDCG@5 0.2689 (enwiki scores excluded)
+                       'bm25_allfield' => [
+                               'trigger' => 'bm25:allfield',
+                               'globals' => [
+                                       'wgCirrusSearchDefaultCluster' => 
'codfw',
+                                       
'wgCirrusSearchFullTextQueryBuilderProfile' => 'default',
+                                       
'wgCirrusSearchPhraseSuggestReverseField' => [
+                                               'build' => true,
+                                               'use' => false,
+                                       ],
+                                       
'wgCirrusSearchIgnoreOnWikiBoostTemplates' => true,
+                                       // set only here because only needed 
for reindexing
+                                       'wgCirrusSearchSimilarityProfile' => [
+                                               'similarity' => [
+                                                       'arrays' => [
+                                                               'type' => 
'BM25',
+                                                               'k1' => 1.2,
+                                                               'b' => 0.3,
+                                                       ],
+                                                       'default' => [
+                                                               'type' => 
'BM25',
+                                                               'k1' => 1.2,
+                                                               'b' => 0.75,
+                                                       ],
+                                               ],
+                                               'fields' => [
+                                                       '__default__' => 
'default',
+                                                       'category' => 'arrays',
+                                                       'heading' => 'arrays',
+                                                       'redirect' => 'arrays',
+                                                       'suggest' => 'arrays',
+                                               ],
+                                       ],
+                                       'wgCirrusSearchRescoreProfile' => 
'wsum_inclinks',
+                                       'wgCirrusSearchIncLinksAloneW' => 1.3,
+                                       'wgCirrusSearchIncLinksAloneK' => 30,
+                                       'wgCirrusSearchIncLinksAloneA' => 0.7,
+                               ]
+                       ],
+                       // BM25, perfield and SimpleMatch Query builder, 
inclinks as a sum
+                       // nDCG@5 0.3371 (enwiki scores excluded)
+                       'bm25_inclinks' => [
+                               'trigger' => 'bm25:inclinks',
+                               'globals' => [
+                                       'wgCirrusSearchDefaultCluster' => 
'codfw',
+                                       
'wgCirrusSearchFullTextQueryBuilderProfile' => 'perfield_builder',
+                                       
'wgCirrusSearchIgnoreOnWikiBoostTemplates' => true,
+                                       
'wgCirrusSearchPhraseSuggestReverseField' => [
+                                               'build' => true,
+                                               'use' => false,
+                                       ],
+                                       'wgCirrusSearchRescoreProfile' => 
'wsum_inclinks',
+                                       'wgCirrusSearchIncLinksAloneW' => 6.5,
+                                       'wgCirrusSearchIncLinksAloneK' => 30,
+                                       'wgCirrusSearchIncLinksAloneA' => 0.7,
+                               ]
+                       ],
+                       // BM25, perfield and SimpleMatch Query builder, 
inclinks+pop score as a sum
+                       // nDCG@5 0.3368 (enwiki scores excluded)
+                       'bm25_inclinks_pv' => [
+                               'trigger' => 'bm25:inclinks_pv',
+                               'globals' => [
+                                       'wgCirrusSearchDefaultCluster' => 
'codfw',
+                                       
'wgCirrusSearchFullTextQueryBuilderProfile' => 'perfield_builder',
+                                       
'wgCirrusSearchIgnoreOnWikiBoostTemplates' => true,
+                                       
'wgCirrusSearchPhraseSuggestReverseField' => [
+                                               'build' => true,
+                                               'use' => false,
+                                       ],
+                                       'wgCirrusSearchRescoreProfile' => 
'wsum_inclinks_pv',
+                                       'wgCirrusSearchPageViewsW' => 1.5,
+                                       'wgCirrusSearchPageViewsK' => 8E-6,
+                                       'wgCirrusSearchPageViewsA' => 0.8,
+                                       'wgCirrusSearchIncLinksW' => 5.0,
+                                       'wgCirrusSearchIncLinksK' => 30,
+                                       'wgCirrusSearchIncLinksA' => 0.7,
+                                       'wgCirrusSearchIncLinksAloneW' => 6.5,
+                                       'wgCirrusSearchIncLinksAloneK' => 30,
+                                       'wgCirrusSearchIncLinksAloneA' => 0.7,
+                               ]
+                       ],
+                       // BM25, perfield and SimpleMatch Query builder, 
inclinks+pop score as a sum
+                       // nDCG@5 0.3368 (enwiki scores excluded)
+                       // Reverse field enabled for DYM
+                       'bm25_inclinks_pv_rev' => [
+                               'trigger' => 'bm25:inclinks_pv_rev',
+                               'globals' => [
+                                       'wgCirrusSearchDefaultCluster' => 
'codfw',
+                                       
'wgCirrusSearchFullTextQueryBuilderProfile' => 'perfield_builder',
+                                       
'wgCirrusSearchPhraseSuggestReverseField' => [
+                                               'build' => true,
+                                               'use' => true,
+                                       ],
+                                       
'wgCirrusSearchIgnoreOnWikiBoostTemplates' => true,
+                                       'wgCirrusSearchPageViewsW' => 1.5,
+                                       'wgCirrusSearchPageViewsK' => 8E-6,
+                                       'wgCirrusSearchPageViewsA' => 0.8,
+                                       'wgCirrusSearchIncLinksW' => 5.0,
+                                       'wgCirrusSearchIncLinksK' => 30,
+                                       'wgCirrusSearchIncLinksA' => 0.7,
+                                       'wgCirrusSearchRescoreProfile' => 
'wsum_inclinks_pv',
+                                       'wgCirrusSearchIncLinksAloneW' => 6.5,
+                                       'wgCirrusSearchIncLinksAloneK' => 30,
+                                       'wgCirrusSearchIncLinksAloneA' => 0.7,
+                               ],
+                       ],
+               ],
+       ];
+}
+
 # Turn off leading wildcard matches, they are a very slow and inefficient query
 $wgCirrusSearchAllowLeadingWildcard = false;
 

-- 
To view, visit https://gerrit.wikimedia.org/r/307350
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: Ia26197e9b7c6e496d8d7e21b629bb0725974286b
Gerrit-PatchSet: 1
Gerrit-Project: operations/mediawiki-config
Gerrit-Branch: master
Gerrit-Owner: Thcipriani <tcipri...@wikimedia.org>

_______________________________________________
MediaWiki-commits mailing list
MediaWiki-commits@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to