jenkins-bot has submitted this change and it was merged. ( https://gerrit.wikimedia.org/r/377498 )
Change subject: Put default search namespaces in content index ...................................................................... Put default search namespaces in content index Part of the benefit of splitting our data between content and general indices, besides frequency counts, is offering better performance by querying a much smaller dataset. A variety of wikis though may configure their default search namespaces to include more than just content namespaces. Adjust our handling so all namespaces searched by default are included in the content index. After deployment this will require a run of the saneitizer on all wikis that have additional namespaces in their wgNamespacesToBeSearchedDefault. Change-Id: Iba8b8e1f204958ccdf2cab562dc34e0008fe97ea --- M includes/Api/ConfigDump.php M includes/Connection.php M includes/Search/RescoreBuilders.php M includes/SearchConfig.php M tests/unit/ConnectionTest.php M tests/unit/RescoreBuilderTest.php M tests/unit/SearcherTest.php 7 files changed, 49 insertions(+), 15 deletions(-) Approvals: Smalyshev: Looks good to me, approved Cindy-the-browser-test-bot: Looks good to me, but someone else must approve jenkins-bot: Verified DCausse: Looks good to me, but someone else must approve diff --git a/includes/Api/ConfigDump.php b/includes/Api/ConfigDump.php index 14dd3ef..aba3420 100644 --- a/includes/Api/ConfigDump.php +++ b/includes/Api/ConfigDump.php @@ -122,6 +122,7 @@ 'CirrusSearchMaxPhraseTokens', 'LanguageCode', 'ContentNamespaces', + 'NamespacesToBeSearchedDefault', ]; public function execute() { diff --git a/includes/Connection.php b/includes/Connection.php index 128985d..c2a4da0 100644 --- a/includes/Connection.php +++ b/includes/Connection.php @@ -265,6 +265,10 @@ if ( isset( $mappings[$namespace] ) ) { return $mappings[$namespace]; } + $defaultSearch = $this->config->get( 'NamespacesToBeSearchedDefault' ); + if ( isset( $defaultSearch[$namespace] ) && $defaultSearch[$namespace] ) { + return self::CONTENT_INDEX_TYPE; + } return MWNamespace::isContent( $namespace ) ? self::CONTENT_INDEX_TYPE : self::GENERAL_INDEX_TYPE; @@ -282,14 +286,29 @@ } $mappings = $this->config->get( 'CirrusSearchNamespaceMappings' ); - $count = count( array_keys( $mappings, $indexType ) ); + $inIndexType = []; + foreach ( $mappings as $ns => $type ) { + if ( $indexType === $type ) { + $inIndexType[$ns] = true; + } + } if ( $indexType === self::CONTENT_INDEX_TYPE ) { // The content namespace includes everything set in the mappings to content (count right now) // Plus everything in wgContentNamespaces that isn't already in namespace mappings $contentNamespaces = $this->config->get( 'ContentNamespaces' ); - $count += count( array_diff( $contentNamespaces, array_keys( $mappings ) ) ); + foreach ( $contentNamespaces as $ns ) { + if ( !isset( $mappings[$ns] ) ) { + $inIndexType[$ns] = true; + } + } + $defaultSearch = $this->config->get( 'NamespacesToBeSearchedDefault' ); + foreach ( $defaultSearch as $ns => $shouldSearch ) { + if ( $shouldSearch && !isset( $mappings[$ns] ) ) { + $inIndexType[$ns] = true; + } + } } - return $count; + return count( $inIndexType ); } /** diff --git a/includes/Search/RescoreBuilders.php b/includes/Search/RescoreBuilders.php index 2650c52..16f5fbc 100644 --- a/includes/Search/RescoreBuilders.php +++ b/includes/Search/RescoreBuilders.php @@ -222,6 +222,13 @@ return true; case 'content': $profileNs = $this->context->getConfig()->get( 'ContentNamespaces' ); + // Default search namespaces are also considered content + $defaultSearch = $this->context->getConfig()->get( 'NamespacesToBeSearchedDefault' ); + foreach ( $defaultSearch as $ns => $isDefault ) { + if ( $isDefault ) { + $profileNs[] = $ns; + } + } break; default: throw new InvalidRescoreProfileException( "Invalid rescore profile: supported_namespaces should be 'all', 'content' or an array of namespaces" ); diff --git a/includes/SearchConfig.php b/includes/SearchConfig.php index 4d49ddd..a44443c 100644 --- a/includes/SearchConfig.php +++ b/includes/SearchConfig.php @@ -20,6 +20,7 @@ private static $nonCirrusVars = [ 'wgLanguageCode', 'wgContentNamespaces', + 'wgNamespacesToBeSearchedDefault', ]; /** diff --git a/tests/unit/ConnectionTest.php b/tests/unit/ConnectionTest.php index 99f3e43..a2ee516 100644 --- a/tests/unit/ConnectionTest.php +++ b/tests/unit/ConnectionTest.php @@ -26,10 +26,11 @@ /** * @dataProvider provideNamespacesInIndexType */ - public function testNamespacesInIndexType( $contentNamespaces, $namespaceMappings, $indexType, $expected ) { + public function testNamespacesInIndexType( $contentNamespaces, $defaultSearchNamespaces, $namespaceMappings, $indexType, $expected ) { $config = new HashSearchConfig( [ 'ContentNamespaces' => $contentNamespaces, 'CirrusSearchNamespaceMappings' => $namespaceMappings, + 'NamespacesToBeSearchedDefault' => $defaultSearchNamespaces, ], [ 'inherit' ] ); $conn = new Connection( $config ); $this->assertEquals( $expected, $conn->namespacesInIndexType( $indexType ) ); @@ -38,20 +39,24 @@ public static function provideNamespacesInIndexType() { return [ // Standard: - [ [ NS_MAIN ], [], 'content', 1 ], - [ [ NS_MAIN ], [], 'general', false ], + [ [ NS_MAIN ], [ NS_MAIN => true ], [], 'content', 1 ], + [ [ NS_MAIN ], [ NS_MAIN => true ], [], 'general', false ], // Commons: - [ [ NS_MAIN ], [ NS_FILE => 'file' ], 'file', 1 ], + [ [ NS_MAIN ], [ NS_MAIN => true ], [ NS_FILE => 'file' ], 'file', 1 ], // Funky: - [ [ NS_MAIN ], [ NS_FILE => 'file', NS_FILE_TALK => 'file' ], 'file', 2 ], - [ [ NS_MAIN ], [ NS_FILE => 'file', NS_FILE_TALK => 'file' ], 'conent', false ], - [ [ NS_MAIN, NS_FILE ], [], 'content', 2 ], - [ [ NS_MAIN, NS_FILE ], [ NS_FILE => 'file' ], 'file', 1 ], - [ [ NS_MAIN, NS_FILE ], [ NS_FILE => 'file' ], 'content', 1 ], - [ [ NS_MAIN, NS_FILE, NS_FILE_TALK ], [ NS_FILE => 'file' ], 'content', 2 ], - [ [ NS_MAIN, NS_FILE, NS_FILE_TALK ], [], 'content', 3 ], + [ [ NS_MAIN ], [ NS_MAIN => true ], [ NS_FILE => 'file', NS_FILE_TALK => 'file' ], 'file', 2 ], + [ [ NS_MAIN ], [ NS_MAIN => true ], [ NS_FILE => 'file', NS_FILE_TALK => 'file' ], 'conent', false ], + [ [ NS_MAIN, NS_FILE ], [ NS_MAIN => true ], [], 'content', 2 ], + [ [ NS_MAIN, NS_FILE ], [ NS_MAIN => true ], [ NS_FILE => 'file' ], 'file', 1 ], + [ [ NS_MAIN, NS_FILE ], [ NS_MAIN => true ], [ NS_FILE => 'file' ], 'content', 1 ], + [ [ NS_MAIN, NS_FILE, NS_FILE_TALK ], [ NS_MAIN => true ], [ NS_FILE => 'file' ], 'content', 2 ], + [ [ NS_MAIN, NS_FILE, NS_FILE_TALK ], [ NS_MAIN => true ], [], 'content', 3 ], + [ [ NS_MAIN ], [ NS_MAIN => true, NS_FILE => true ], [ NS_FILE => 'file' ], 'content', 1 ], + [ [ NS_MAIN ], [ NS_MAIN => true, NS_FILE => true ], [ NS_FILE => 'file' ], 'file', 1 ], + [ [ NS_MAIN, NS_FILE ], [ NS_MAIN => true, NS_FILE => true ], [ NS_FILE => 'file' ], 'content', 1 ], + [ [ NS_MAIN, NS_FILE ], [ NS_MAIN => true, NS_FILE => true ], [ NS_FILE => 'file' ], 'file', 1 ], ]; } diff --git a/tests/unit/RescoreBuilderTest.php b/tests/unit/RescoreBuilderTest.php index 28730b4..d4abb56 100644 --- a/tests/unit/RescoreBuilderTest.php +++ b/tests/unit/RescoreBuilderTest.php @@ -265,6 +265,7 @@ ]; $profile = [ 'ContentNamespaces' => [ 1, 2 ], + 'NamespacesToBeSearchedDefault' => [ 1 => true ], 'CirrusSearchRescoreProfiles' => [ 'full' => [ 'supported_namespaces' => [ 0, 1 ], diff --git a/tests/unit/SearcherTest.php b/tests/unit/SearcherTest.php index 182eaaf..9795468 100644 --- a/tests/unit/SearcherTest.php +++ b/tests/unit/SearcherTest.php @@ -165,7 +165,7 @@ } } $this->assertEmpty( $notInApi, implode( ',', $notInApi ) . " are exported from \CirrusSearch\Api\ConfigDump" ); - $this->assertEmpty( $notInSearchConfig, implode( ',', $notInApi ) . " are allowed in SearchConfig::getNonCirrusConfigVarNames()" ); + $this->assertEmpty( $notInSearchConfig, implode( ',', $notInSearchConfig ) . " are allowed in SearchConfig::getNonCirrusConfigVarNames()" ); } finally { SearchConfigUsageDecorator::resetUsedConfigKeys(); } -- To view, visit https://gerrit.wikimedia.org/r/377498 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: merged Gerrit-Change-Id: Iba8b8e1f204958ccdf2cab562dc34e0008fe97ea Gerrit-PatchSet: 1 Gerrit-Project: mediawiki/extensions/CirrusSearch Gerrit-Branch: master Gerrit-Owner: EBernhardson <ebernhard...@wikimedia.org> Gerrit-Reviewer: Cindy-the-browser-test-bot <bernhardsone...@gmail.com> Gerrit-Reviewer: DCausse <dcau...@wikimedia.org> Gerrit-Reviewer: Gehel <guillaume.leder...@wikimedia.org> Gerrit-Reviewer: Smalyshev <smalys...@wikimedia.org> Gerrit-Reviewer: Tjones <tjo...@wikimedia.org> Gerrit-Reviewer: jenkins-bot <> _______________________________________________ MediaWiki-commits mailing list MediaWiki-commits@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits