DCausse has uploaded a new change for review. https://gerrit.wikimedia.org/r/305394
Change subject: Adjust brower test settings ...................................................................... Adjust brower test settings Current settings seem to be too fragile and can cause cindy to fail unexpectedly. This patch tries to reduce the chances for this to happen by lowering the norms impact and adjusting field boosts. Change-Id: I2d12b3103afb2c08d470563c766d09105f6b0905 --- M profiles/FullTextQueryBuilderProfiles.config.php M profiles/SimilarityProfiles.php M tests/browser/features/relevancy_api.feature 3 files changed, 19 insertions(+), 9 deletions(-) git pull ssh://gerrit.wikimedia.org:29418/mediawiki/extensions/CirrusSearch refs/changes/94/305394/1 diff --git a/profiles/FullTextQueryBuilderProfiles.config.php b/profiles/FullTextQueryBuilderProfiles.config.php index 34f1b0c..b9f0ae8 100644 --- a/profiles/FullTextQueryBuilderProfiles.config.php +++ b/profiles/FullTextQueryBuilderProfiles.config.php @@ -59,10 +59,9 @@ 'boost' => 2.1, 'in_dismax' => 'redirects_or_shingles', ), - // Very high category weight still unclear why such - // high boost is needed for tests/browser/features/relevancy_api.feature - 'category' => 3.5, - 'heading' => 2.3, + // category should win over heading/opening + 'category' => 1.8, + 'heading' => 1.5, // Pack text and opening_text in a dismax query // this is to avoid scoring twice the same words 'text' => array( @@ -70,7 +69,7 @@ 'in_dismax' => 'text_and_opening_text', ), 'opening_text' => array( - 'boost' => 0.5, + 'boost' => 0.7, 'in_dismax' => 'text_and_opening_text', ), 'auxiliary_text' => 0.2, diff --git a/profiles/SimilarityProfiles.php b/profiles/SimilarityProfiles.php index 7edc866..1fc650e 100644 --- a/profiles/SimilarityProfiles.php +++ b/profiles/SimilarityProfiles.php @@ -76,8 +76,11 @@ ), 'bm25_browser_tests' => array( 'similarity' => array( - // Lower norms impact for redirects data - 'suggest_and_redirects' => array( + // Lower norms impact, cirrustestwiki is not well + // balanced with many small docs without opening nor + // heading resulting in very low avg field length + // on such fields + 'lower_norms' => array( 'type' => 'BM25', 'k1' => 1.2, 'b' => 0.3, @@ -88,8 +91,11 @@ ), 'fields' => array( '__default__' => 'with_defaults', - 'suggest' => 'suggest_and_redirects', - 'redirect' => 'suggest_and_redirects', + 'suggest' => 'lower_norms', + 'redirect' => 'lower_norms', + 'opening_text' => 'lower_norms', + 'heading' => 'lower_norms', + 'category' => 'lower_norms', ), ), ); diff --git a/tests/browser/features/relevancy_api.feature b/tests/browser/features/relevancy_api.feature index 88ce0fb..e1223f2 100644 --- a/tests/browser/features/relevancy_api.feature +++ b/tests/browser/features/relevancy_api.feature @@ -36,6 +36,11 @@ # 2. Its quite possible for the second result to be deeper in the result list for a few seconds after the pages are # created. It gets its position updated by the link counting job which has to wait for refreshing and undelaying. + # Last two tests use "sixth or seventh" because the current implementation of the all field + # and the copy_to hack will copy the content only one time for both text and auxiliary_text + # auxiliary_text is set to 0.5 but will be approximated to 1 (similar to text) + # phrase freq will be identical for both fields making length norms the sole discriminating + # criteria. Scenario: Results are sorted based on what part of the page matches: title, redirect, category, etc When I api search with query independent profile classic_noboostlinks for Relevancytest Then Relevancytest is the first api search result -- To view, visit https://gerrit.wikimedia.org/r/305394 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: newchange Gerrit-Change-Id: I2d12b3103afb2c08d470563c766d09105f6b0905 Gerrit-PatchSet: 1 Gerrit-Project: mediawiki/extensions/CirrusSearch Gerrit-Branch: master Gerrit-Owner: DCausse <dcau...@wikimedia.org> _______________________________________________ MediaWiki-commits mailing list MediaWiki-commits@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits