Thcipriani has uploaded a new change for review. https://gerrit.wikimedia.org/r/307348
Change subject: Revert "CirrusSearch BM25 A/B test config" ...................................................................... Revert "CirrusSearch BM25 A/B test config" This reverts commit bac44298d7a08949ad52d1dedf5b793a6f94e5e8. Change-Id: I1a9e38141578ce7fb8739f6579122c5cfb518aa9 --- M tests/cirrusTest.php M wmf-config/CirrusSearch-common.php 2 files changed, 0 insertions(+), 309 deletions(-) git pull ssh://gerrit.wikimedia.org:29418/operations/mediawiki-config refs/changes/48/307348/1 diff --git a/tests/cirrusTest.php b/tests/cirrusTest.php index 7772c42..06268d5 100644 --- a/tests/cirrusTest.php +++ b/tests/cirrusTest.php @@ -101,9 +101,6 @@ // variables that would have been setup elsewhere, perhaps in mediawiki // default settings or by CommonSettings.php, or by CirrusSearch.php, // but none of those are a part of this repository - $wgCirrusSearchRescoreProfiles = array(); - $wgCirrusSearchRescoreFunctionScoreChains = array(); - $wgCirrusSearchFullTextQueryBuilderProfiles = array(); $wgJobTypeConf = array( 'default' => array() ); $wgCirrusSearchWeights = array(); $wgCirrusSearchNamespaceWeights = array(); diff --git a/wmf-config/CirrusSearch-common.php b/wmf-config/CirrusSearch-common.php index 0b73b1c..e20cbea 100644 --- a/wmf-config/CirrusSearch-common.php +++ b/wmf-config/CirrusSearch-common.php @@ -33,312 +33,6 @@ # Enable user testing $wgCirrusSearchUserTesting = $wmgCirrusSearchUserTesting; -# BM25 A/B test, enabled only on enwiki to avoid conflicts with -# with TextCat language detection -if ( $wgDBname === 'enwiki' ) { -# UserTesting requires that a var exists in $GLOBALS before setting it -# All extra vars needed to customize rescore weights - $wgCirrusSearchPageViewsW = 1.0; - $wgCirrusSearchPageViewsK = 1.0; - $wgCirrusSearchPageViewsA = 1.0; - $wgCirrusSearchIncLinksW = 1.0; - $wgCirrusSearchIncLinksK = 1.0; - $wgCirrusSearchIncLinksA = 1.0; - $wgCirrusSearchIncLinksAloneW = 1.0; - $wgCirrusSearchIncLinksAloneK = 1.0; - $wgCirrusSearchIncLinksAloneA = 1.0; - - $wgCirrusSearchUserTesting['bm25'] = [ - 'sampleRate' => 0, - 'globals' => [ - 'wgCirrusSearchBoostTemplates' => [], - 'wgCirrusSearchRescoreProfiles' => $wgCirrusSearchRescoreProfiles + [ - 'wsum_inclinks' => [ - 'supported_namespaces' => 'all', - 'rescore' => [ - [ - 'window' => 8192, - 'window_size_override' => 'CirrusSearchFunctionRescoreWindowSize', - 'query_weight' => 1.0, - 'rescore_query_weight' => 1.0, - 'score_mode' => 'total', - 'type' => 'function_score', - 'function_chain' => 'wsum_inclinks' - ], - [ - 'window' => 8192, - 'window_size_override' => 'CirrusSearchFunctionRescoreWindowSize', - 'query_weight' => 1.0, - 'rescore_query_weight' => 1.0, - 'score_mode' => 'multiply', - 'type' => 'function_score', - 'function_chain' => 'optional_chain' - ], - ], - ], - 'wsum_inclinks_pv' => [ - 'supported_namespaces' => 'content', - 'fallback_profile' => 'wsum_inclinks', - 'rescore' => [ - [ - 'window' => 8192, - 'window_size_override' => 'CirrusSearchFunctionRescoreWindowSize', - 'query_weight' => 1.0, - 'rescore_query_weight' => 1.0, - 'score_mode' => 'total', - 'type' => 'function_score', - 'function_chain' => 'wsum_inclinks_pv' - ], - [ - 'window' => 8192, - 'window_size_override' => 'CirrusSearchFunctionRescoreWindowSize', - 'query_weight' => 1.0, - 'rescore_query_weight' => 1.0, - 'score_mode' => 'multiply', - 'type' => 'function_score', - 'function_chain' => 'optional_chain' - ], - ], - ], - ], - 'wgCirrusSearchRescoreFunctionScoreChains' => $wgCirrusSearchRescoreFunctionScoreChains + [ - 'wsum_inclinks' => [ - 'functions' => [ - [ - 'type' => 'satu', - 'weight' => [ - 'value' => 1.2, - 'config_override' => 'CirrusSearchIncLinksAloneW', - 'uri_param_override' => 'cirrusIncLinksAloneW', - ], - 'params' => [ - 'field' => 'incoming_links', - 'k' => [ - 'value' => 10, - 'config_override' => 'CirrusSearchIncLinksAloneK', - 'uri_param_override' => 'cirrusIncLinksAloneK', - ], - 'a' => [ - 'value' => 1, - 'config_override' => 'CirrusSearchIncLinksAloneA', - 'uri_param_override' => 'cirrusIncLinksAloneA', - ] - ], - ], - ], - ], - 'wsum_inclinks_pv' => [ - 'score_mode' => 'sum', - 'boost_mode' => 'sum', - 'functions' => [ - [ - 'type' => 'satu', - 'weight' => [ - 'value' => 1.8, - 'config_override' => 'CirrusSearchPageViewsW', - 'uri_param_override' => 'cirrusPageViewsW', - ], - 'params' => [ - 'field' => 'popularity_score', - 'k' => [ - 'value' => 0.0000007, - 'config_override' => 'CirrusSearchPageViewsK', - 'uri_param_override' => 'cirrusPageViewsK', - ], - 'a' => [ - 'value' => 1, - 'config_override' => 'CirrusSearchPageViewsA', - 'uri_param_override' => 'cirrusPageViewsA', - ], - ], - ], - [ - 'type' => 'satu', - 'weight' => [ - 'value' => 0.6, - 'config_override' => 'CirrusSearchIncLinksW', - 'uri_param_override' => 'cirrusIncLinkssW', - ], - 'params' => [ - 'field' => 'incoming_links', - 'k' => [ - 'value' => 10, - 'config_override' => 'CirrusSearchIncLinksK', - 'uri_param_override' => 'cirrusIncLinksK', - ], - 'a' => [ - 'value' => 1, - 'config_override' => 'CirrusSearchIncLinksA', - 'uri_param_override' => 'cirrusIncLinksA', - ], - ], - ], - ], - ], - ], - 'wgCirrusSearchFullTextQueryBuilderProfiles' => $wgCirrusSearchFullTextQueryBuilderProfiles + [ - 'perfield_builder' => [ - 'builder_class' => \CirrusSearch\Query\FullTextSimpleMatchQueryBuilder::class, - 'settings' => [ - 'default_min_should_match' => '1', - 'default_query_type' => 'most_fields', - 'default_stem_weight' => 3.0, - 'fields' => [ - 'title' => 0.3, - 'redirect.title' => [ - 'boost' => 0.27, - 'in_dismax' => 'redirects_or_shingles' - ], - 'suggest' => [ - 'is_plain' => true, - 'boost' => 0.20, - 'in_dismax' => 'redirects_or_shingles', - ], - 'category' => 0.05, - 'heading' => 0.05, - 'text' => [ - 'boost' => 0.6, - 'in_dismax' => 'text_and_opening_text', - ], - 'opening_text' => [ - 'boost' => 0.5, - 'in_dismax' => 'text_and_opening_text', - ], - 'auxiliary_text' => 0.05, - 'file_text' => 0.5, - ], - 'phrase_rescore_fields' => [ - // very low (don't forget it's multiplied by 10 by default) - // Use the all field to avoid loading positions on another field, - // score is roughly the same when used on text - 'all' => 0.03, - 'all.plain' => 0.05, - ], - ], - ], - ], - ], - 'buckets' => [ - // Prod settings on eqiad - // nDCG@5 0.2772 (enwiki scores excluded) - 'control' => [ - 'trigger' => 'bm25:control', - 'globals' => [ - 'wgCirrusSearchFullTextQueryBuilderProfile' => 'default', - 'wgCirrusSearchExtraBackendLatency' => 30000, - ], - ], - // BM25+allfield and QueryString, inclinks as a sum - // nDCG@5 0.2689 (enwiki scores excluded) - 'bm25_allfield' => [ - 'trigger' => 'bm25:allfield', - 'globals' => [ - 'wgCirrusSearchDefaultCluster' => 'codfw', - 'wgCirrusSearchFullTextQueryBuilderProfile' => 'default', - 'wgCirrusSearchPhraseSuggestReverseField' => [ - 'build' => true, - 'use' => false, - ], - 'wgCirrusSearchIgnoreOnWikiBoostTemplates' => true, - // set only here because only needed for reindexing - 'wgCirrusSearchSimilarityProfile' => [ - 'similarity' => [ - 'arrays' => [ - 'type' => 'BM25', - 'k1' => 1.2, - 'b' => 0.3, - ], - 'default' => [ - 'type' => 'BM25', - 'k1' => 1.2, - 'b' => 0.75, - ], - ], - 'fields' => [ - '__default__' => 'default', - 'category' => 'arrays', - 'heading' => 'arrays', - 'redirect' => 'arrays', - 'suggest' => 'arrays', - ], - ], - 'wgCirrusSearchRescoreProfile' => 'wsum_inclinks', - 'wgCirrusSearchIncLinksAloneW' => 1.3, - 'wgCirrusSearchIncLinksAloneK' => 30, - 'wgCirrusSearchIncLinksAloneA' => 0.7, - ] - ], - // BM25, perfield and SimpleMatch Query builder, inclinks as a sum - // nDCG@5 0.3371 (enwiki scores excluded) - 'bm25_inclinks' => [ - 'trigger' => 'bm25:inclinks', - 'globals' => [ - 'wgCirrusSearchDefaultCluster' => 'codfw', - 'wgCirrusSearchFullTextQueryBuilderProfile' => 'perfield_builder', - 'wgCirrusSearchIgnoreOnWikiBoostTemplates' => true, - 'wgCirrusSearchPhraseSuggestReverseField' => [ - 'build' => true, - 'use' => false, - ], - 'wgCirrusSearchRescoreProfile' => 'wsum_inclinks', - 'wgCirrusSearchIncLinksAloneW' => 6.5, - 'wgCirrusSearchIncLinksAloneK' => 30, - 'wgCirrusSearchIncLinksAloneA' => 0.7, - ] - ], - // BM25, perfield and SimpleMatch Query builder, inclinks+pop score as a sum - // nDCG@5 0.3368 (enwiki scores excluded) - 'bm25_inclinks_pv' => [ - 'trigger' => 'bm25:inclinks_pv', - 'globals' => [ - 'wgCirrusSearchDefaultCluster' => 'codfw', - 'wgCirrusSearchFullTextQueryBuilderProfile' => 'perfield_builder', - 'wgCirrusSearchIgnoreOnWikiBoostTemplates' => true, - 'wgCirrusSearchPhraseSuggestReverseField' => [ - 'build' => true, - 'use' => false, - ], - 'wgCirrusSearchRescoreProfile' => 'wsum_inclinks_pv', - 'wgCirrusSearchPageViewsW' => 1.5, - 'wgCirrusSearchPageViewsK' => 8E-6, - 'wgCirrusSearchPageViewsA' => 0.8, - 'wgCirrusSearchIncLinksW' => 5.0, - 'wgCirrusSearchIncLinksK' => 30, - 'wgCirrusSearchIncLinksA' => 0.7, - 'wgCirrusSearchIncLinksAloneW' => 6.5, - 'wgCirrusSearchIncLinksAloneK' => 30, - 'wgCirrusSearchIncLinksAloneA' => 0.7, - ] - ], - // BM25, perfield and SimpleMatch Query builder, inclinks+pop score as a sum - // nDCG@5 0.3368 (enwiki scores excluded) - // Reverse field enabled for DYM - 'bm25_inclinks_pv_rev' => [ - 'trigger' => 'bm25:inclinks_pv_rev', - 'globals' => [ - 'wgCirrusSearchDefaultCluster' => 'codfw', - 'wgCirrusSearchFullTextQueryBuilderProfile' => 'perfield_builder', - 'wgCirrusSearchPhraseSuggestReverseField' => [ - 'build' => true, - 'use' => true, - ], - 'wgCirrusSearchIgnoreOnWikiBoostTemplates' => true, - 'wgCirrusSearchPageViewsW' => 1.5, - 'wgCirrusSearchPageViewsK' => 8E-6, - 'wgCirrusSearchPageViewsA' => 0.8, - 'wgCirrusSearchIncLinksW' => 5.0, - 'wgCirrusSearchIncLinksK' => 30, - 'wgCirrusSearchIncLinksA' => 0.7, - 'wgCirrusSearchRescoreProfile' => 'wsum_inclinks_pv', - 'wgCirrusSearchIncLinksAloneW' => 6.5, - 'wgCirrusSearchIncLinksAloneK' => 30, - 'wgCirrusSearchIncLinksAloneA' => 0.7, - ], - ], - ], - ]; -} - # Turn off leading wildcard matches, they are a very slow and inefficient query $wgCirrusSearchAllowLeadingWildcard = false; -- To view, visit https://gerrit.wikimedia.org/r/307348 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: newchange Gerrit-Change-Id: I1a9e38141578ce7fb8739f6579122c5cfb518aa9 Gerrit-PatchSet: 1 Gerrit-Project: operations/mediawiki-config Gerrit-Branch: master Gerrit-Owner: Thcipriani <tcipri...@wikimedia.org> _______________________________________________ MediaWiki-commits mailing list MediaWiki-commits@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits